diff options
Diffstat (limited to 'contrib/llvm/lib/CodeGen')
219 files changed, 171548 insertions, 0 deletions
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp new file mode 100644 index 0000000..4060db7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -0,0 +1,971 @@ +//===----- AggressiveAntiDepBreaker.cpp - Anti-dep breaker ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AggressiveAntiDepBreaker class, which +// implements register anti-dependence breaking during post-RA +// scheduling. It attempts to break all anti-dependencies within a +// block. +// +//===----------------------------------------------------------------------===// + +#include "AggressiveAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "post-RA-sched" + +// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("agg-antidep-debugdiv", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("agg-antidep-debugmod", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); + +AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs, + MachineBasicBlock *BB) : + NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0), + GroupNodeIndices(TargetRegs, 0), + KillIndices(TargetRegs, 0), + DefIndices(TargetRegs, 0) +{ + const unsigned BBSize = BB->size(); + for (unsigned i = 0; i < NumTargetRegs; ++i) { + // Initialize all registers to be in their own group. Initially we + // assign the register to the same-indexed GroupNode. + GroupNodeIndices[i] = i; + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } +} + +unsigned AggressiveAntiDepState::GetGroup(unsigned Reg) { + unsigned Node = GroupNodeIndices[Reg]; + while (GroupNodes[Node] != Node) + Node = GroupNodes[Node]; + + return Node; +} + +void AggressiveAntiDepState::GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs) +{ + for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) { + if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0)) + Regs.push_back(Reg); + } +} + +unsigned AggressiveAntiDepState::UnionGroups(unsigned Reg1, unsigned Reg2) +{ + assert(GroupNodes[0] == 0 && "GroupNode 0 not parent!"); + assert(GroupNodeIndices[0] == 0 && "Reg 0 not in Group 0!"); + + // find group for each register + unsigned Group1 = GetGroup(Reg1); + unsigned Group2 = GetGroup(Reg2); + + // if either group is 0, then that must become the parent + unsigned Parent = (Group1 == 0) ? Group1 : Group2; + unsigned Other = (Parent == Group1) ? Group2 : Group1; + GroupNodes.at(Other) = Parent; + return Parent; +} + +unsigned AggressiveAntiDepState::LeaveGroup(unsigned Reg) +{ + // Create a new GroupNode for Reg. Reg's existing GroupNode must + // stay as is because there could be other GroupNodes referring to + // it. + unsigned idx = GroupNodes.size(); + GroupNodes.push_back(idx); + GroupNodeIndices[Reg] = idx; + return idx; +} + +bool AggressiveAntiDepState::IsLive(unsigned Reg) +{ + // KillIndex must be defined and DefIndex not defined for a register + // to be live. + return((KillIndices[Reg] != ~0u) && (DefIndices[Reg] == ~0u)); +} + +AggressiveAntiDepBreaker::AggressiveAntiDepBreaker( + MachineFunction &MFi, const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) + : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getSubtarget().getInstrInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), + State(nullptr) { + /* Collect a bitset of all registers that are only broken if they + are on the critical path. */ + for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { + BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]); + if (CriticalPathSet.none()) + CriticalPathSet = CPSet; + else + CriticalPathSet |= CPSet; + } + + DEBUG(dbgs() << "AntiDep Critical-Path Registers:"); + DEBUG(for (int r = CriticalPathSet.find_first(); r != -1; + r = CriticalPathSet.find_next(r)) + dbgs() << " " << TRI->getName(r)); + DEBUG(dbgs() << '\n'); +} + +AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { + delete State; +} + +void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + assert(!State); + State = new AggressiveAntiDepState(TRI->getNumRegs(), BB); + + bool IsReturnBlock = BB->isReturnBlock(); + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (const auto &LI : (*SI)->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(MF); + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + unsigned Reg = *I; + if (!IsReturnBlock && !Pristine.test(Reg)) continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; + } + } +} + +void AggressiveAntiDepBreaker::FinishBlock() { + delete State; + State = nullptr; +} + +void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + std::set<unsigned> PassthruRegs; + GetPassthruRegs(MI, PassthruRegs); + PrescanInstruction(MI, Count, PassthruRegs); + ScanInstruction(MI, Count); + + DEBUG(dbgs() << "Observe: "); + DEBUG(MI->dump()); + DEBUG(dbgs() << "\tRegs:"); + + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { + // If Reg is current live, then mark that it can't be renamed as + // we don't know the extent of its live-range anymore (now that it + // has been scheduled). If it is not live but was defined in the + // previous schedule region, then set its def index to the most + // conservative location (i.e. the beginning of the previous + // schedule region). + if (State->IsLive(Reg)) { + DEBUG(if (State->GetGroup(Reg) != 0) + dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg) << "->g0(region live-out)"); + State->UnionGroups(Reg, 0); + } else if ((DefIndices[Reg] < InsertPosIndex) + && (DefIndices[Reg] >= Count)) { + DefIndices[Reg] = Count; + } + } + DEBUG(dbgs() << '\n'); +} + +bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, + MachineOperand& MO) +{ + if (!MO.isReg() || !MO.isImplicit()) + return false; + + unsigned Reg = MO.getReg(); + if (Reg == 0) + return false; + + MachineOperand *Op = nullptr; + if (MO.isDef()) + Op = MI->findRegisterUseOperand(Reg, true); + else + Op = MI->findRegisterDefOperand(Reg); + + return(Op && Op->isImplicit()); +} + +void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, + std::set<unsigned>& PassthruRegs) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || + IsImplicitDefUse(MI, MO)) { + const unsigned Reg = MO.getReg(); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + PassthruRegs.insert(*SubRegs); + } + } +} + +/// AntiDepEdges - Return in Edges the anti- and output- dependencies +/// in SU that we want to consider for breaking. +static void AntiDepEdges(const SUnit *SU, std::vector<const SDep*>& Edges) { + SmallSet<unsigned, 4> RegSet; + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { + if (RegSet.insert(P->getReg()).second) + Edges.push_back(&*P); + } + } +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static const SUnit *CriticalPathStep(const SUnit *SU) { + const SDep *Next = nullptr; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + if (SU) { + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + const SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + } + + return (Next) ? Next->getSUnit() : nullptr; +} + +void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, + const char *tag, + const char *header, + const char *footer) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // FIXME: We must leave subregisters of live super registers as live, so that + // we don't clear out the register tracking information for subregisters of + // super registers we're still tracking (and with which we're unioning + // subregister definitions). + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) { + DEBUG(if (!header && footer) dbgs() << footer); + return; + } + + if (!State->IsLive(Reg)) { + KillIndices[Reg] = KillIdx; + DefIndices[Reg] = ~0u; + RegRefs.erase(Reg); + State->LeaveGroup(Reg); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << tag); + } + // Repeat for subregisters. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubregReg = *SubRegs; + if (!State->IsLive(SubregReg)) { + KillIndices[SubregReg] = KillIdx; + DefIndices[SubregReg] = ~0u; + RegRefs.erase(SubregReg); + State->LeaveGroup(SubregReg); + DEBUG(if (header) { + dbgs() << header << TRI->getName(Reg); header = nullptr; }); + DEBUG(dbgs() << " " << TRI->getName(SubregReg) << "->g" << + State->GetGroup(SubregReg) << tag); + } + } + + DEBUG(if (!header && footer) dbgs() << footer); +} + +void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, + unsigned Count, + std::set<unsigned>& PassthruRegs) { + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // Handle dead defs by simulating a last-use of the register just + // after the def. A dead def can occur because the def is truly + // dead, or because only a subregister is live at the def. If we + // don't do this the dead def will be incorrectly merged into the + // previous def. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); + } + + DEBUG(dbgs() << "\tDef Groups:"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << State->GetGroup(Reg)); + + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). Inline assembly may + // reference either system calls or the register directly. Skip it until we + // can tell user specified registers from compiler-specified. + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || + TII->isPredicated(MI) || MI->isInlineAsm()) { + DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + State->UnionGroups(Reg, 0); + } + + // Any aliased that are live at this point are completely or + // partially defined here, so group those aliases with Reg. + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (State->IsLive(AliasReg)) { + State->UnionGroups(Reg, AliasReg); + DEBUG(dbgs() << "->g" << State->GetGroup(Reg) << "(via " << + TRI->getName(AliasReg) << ")"); + } + } + + // Note register reference... + const TargetRegisterClass *RC = nullptr; + if (i < MI->getDesc().getNumOperands()) + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; + RegRefs.insert(std::make_pair(Reg, RR)); + } + + DEBUG(dbgs() << '\n'); + + // Scan the register defs for this instruction and update + // live-ranges. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + // Ignore KILLs and passthru registers for liveness... + if (MI->isKill() || (PassthruRegs.count(Reg) != 0)) + continue; + + // Update def for Reg and aliases. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + // We need to be careful here not to define already-live super registers. + // If the super register is already live, then this definition is not + // a definition of the whole super register (just a partial insertion + // into it). Earlier subregister definitions (which we've not yet visited + // because we're iterating bottom-up) need to be linked to the same group + // as this definition. + if (TRI->isSuperRegister(Reg, *AI) && State->IsLive(*AI)) + continue; + + DefIndices[*AI] = Count; + } + } +} + +void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + DEBUG(dbgs() << "\tUse Groups:"); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // Inline Assembly register uses also cannot be safely changed. + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || + TII->isPredicated(MI) || MI->isInlineAsm(); + + // Scan the register uses for this instruction and update + // live-ranges, groups and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + DEBUG(dbgs() << " " << TRI->getName(Reg) << "=g" << + State->GetGroup(Reg)); + + // It wasn't previously live but now it is, this is a kill. Forget + // the previous live-range information and start a new live-range + // for the register. + HandleLastUse(Reg, Count, "(last-use)"); + + if (Special) { + DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); + State->UnionGroups(Reg, 0); + } + + // Note register reference... + const TargetRegisterClass *RC = nullptr; + if (i < MI->getDesc().getNumOperands()) + RC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + AggressiveAntiDepState::RegisterReference RR = { &MO, RC }; + RegRefs.insert(std::make_pair(Reg, RR)); + } + + DEBUG(dbgs() << '\n'); + + // Form a group of all defs and uses of a KILL instruction to ensure + // that all registers are renamed as a group. + if (MI->isKill()) { + DEBUG(dbgs() << "\tKill Group:"); + + unsigned FirstReg = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (FirstReg != 0) { + DEBUG(dbgs() << "=" << TRI->getName(Reg)); + State->UnionGroups(FirstReg, Reg); + } else { + DEBUG(dbgs() << " " << TRI->getName(Reg)); + FirstReg = Reg; + } + } + + DEBUG(dbgs() << "->g" << State->GetGroup(FirstReg) << '\n'); + } +} + +BitVector AggressiveAntiDepBreaker::GetRenameRegisters(unsigned Reg) { + BitVector BV(TRI->getNumRegs(), false); + bool first = true; + + // Check all references that need rewriting for Reg. For each, use + // the corresponding register class to narrow the set of registers + // that are appropriate for renaming. + for (const auto &Q : make_range(State->GetRegRefs().equal_range(Reg))) { + const TargetRegisterClass *RC = Q.second.RC; + if (!RC) continue; + + BitVector RCBV = TRI->getAllocatableSet(MF, RC); + if (first) { + BV |= RCBV; + first = false; + } else { + BV &= RCBV; + } + + DEBUG(dbgs() << " " << TRI->getRegClassName(RC)); + } + + return BV; +} + +bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( + unsigned AntiDepGroupIndex, + RenameOrderType& RenameOrder, + std::map<unsigned, unsigned> &RenameMap) { + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // Collect all referenced registers in the same group as + // AntiDepReg. These all need to be renamed together if we are to + // break the anti-dependence. + std::vector<unsigned> Regs; + State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs); + assert(Regs.size() > 0 && "Empty register group!"); + if (Regs.size() == 0) + return false; + + // Find the "superest" register in the group. At the same time, + // collect the BitVector of registers that can be used to rename + // each register. + DEBUG(dbgs() << "\tRename Candidates for Group g" << AntiDepGroupIndex + << ":\n"); + std::map<unsigned, BitVector> RenameRegisterMap; + unsigned SuperReg = 0; + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + if ((SuperReg == 0) || TRI->isSuperRegister(SuperReg, Reg)) + SuperReg = Reg; + + // If Reg has any references, then collect possible rename regs + if (RegRefs.count(Reg) > 0) { + DEBUG(dbgs() << "\t\t" << TRI->getName(Reg) << ":"); + + BitVector BV = GetRenameRegisters(Reg); + RenameRegisterMap.insert(std::pair<unsigned, BitVector>(Reg, BV)); + + DEBUG(dbgs() << " ::"); + DEBUG(for (int r = BV.find_first(); r != -1; r = BV.find_next(r)) + dbgs() << " " << TRI->getName(r)); + DEBUG(dbgs() << "\n"); + } + } + + // All group registers should be a subreg of SuperReg. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + if (Reg == SuperReg) continue; + bool IsSub = TRI->isSubRegister(SuperReg, Reg); + // FIXME: remove this once PR18663 has been properly fixed. For now, + // return a conservative answer: + // assert(IsSub && "Expecting group subregister"); + if (!IsSub) + return false; + } + +#ifndef NDEBUG + // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int renamecnt = 0; + if (renamecnt++ % DebugDiv != DebugMod) + return false; + + dbgs() << "*** Performing rename " << TRI->getName(SuperReg) << + " for debug ***\n"; + } +#endif + + // Check each possible rename register for SuperReg in round-robin + // order. If that register is available, and the corresponding + // registers are available for the other group subregisters, then we + // can use those registers to rename. + + // FIXME: Using getMinimalPhysRegClass is very conservative. We should + // check every use of the register and find the largest register class + // that can be used in all of them. + const TargetRegisterClass *SuperRC = + TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); + + ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(SuperRC); + if (Order.empty()) { + DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); + return false; + } + + DEBUG(dbgs() << "\tFind Registers:"); + + RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size())); + + unsigned OrigR = RenameOrder[SuperRC]; + unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR); + unsigned R = OrigR; + do { + if (R == 0) R = Order.size(); + --R; + const unsigned NewSuperReg = Order[R]; + // Don't consider non-allocatable registers + if (!MRI.isAllocatable(NewSuperReg)) continue; + // Don't replace a register with itself. + if (NewSuperReg == SuperReg) continue; + + DEBUG(dbgs() << " [" << TRI->getName(NewSuperReg) << ':'); + RenameMap.clear(); + + // For each referenced group register (which must be a SuperReg or + // a subregister of SuperReg), find the corresponding subregister + // of NewSuperReg and make sure it is free to be renamed. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + unsigned NewReg = 0; + if (Reg == SuperReg) { + NewReg = NewSuperReg; + } else { + unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg); + if (NewSubRegIdx != 0) + NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); + } + + DEBUG(dbgs() << " " << TRI->getName(NewReg)); + + // Check if Reg can be renamed to NewReg. + BitVector BV = RenameRegisterMap[Reg]; + if (!BV.test(NewReg)) { + DEBUG(dbgs() << "(no rename)"); + goto next_super_reg; + } + + // If NewReg is dead and NewReg's most recent def is not before + // Regs's kill, it's safe to replace Reg with NewReg. We + // must also check all aliases of NewReg, because we can't define a + // register when any sub or super is already live. + if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { + DEBUG(dbgs() << "(live)"); + goto next_super_reg; + } else { + bool found = false; + for (MCRegAliasIterator AI(NewReg, TRI, false); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (State->IsLive(AliasReg) || + (KillIndices[Reg] > DefIndices[AliasReg])) { + DEBUG(dbgs() << "(alias " << TRI->getName(AliasReg) << " live)"); + found = true; + break; + } + } + if (found) + goto next_super_reg; + } + + // We cannot rename 'Reg' to 'NewReg' if one of the uses of 'Reg' also + // defines 'NewReg' via an early-clobber operand. + for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { + MachineInstr *UseMI = Q.second.Operand->getParent(); + int Idx = UseMI->findRegisterDefOperandIdx(NewReg, false, true, TRI); + if (Idx == -1) + continue; + + if (UseMI->getOperand(Idx).isEarlyClobber()) { + DEBUG(dbgs() << "(ec)"); + goto next_super_reg; + } + } + + // Also, we cannot rename 'Reg' to 'NewReg' if the instruction defining + // 'Reg' is an early-clobber define and that instruction also uses + // 'NewReg'. + for (const auto &Q : make_range(RegRefs.equal_range(Reg))) { + if (!Q.second.Operand->isDef() || !Q.second.Operand->isEarlyClobber()) + continue; + + MachineInstr *DefMI = Q.second.Operand->getParent(); + if (DefMI->readsRegister(NewReg, TRI)) { + DEBUG(dbgs() << "(ec)"); + goto next_super_reg; + } + } + + // Record that 'Reg' can be renamed to 'NewReg'. + RenameMap.insert(std::pair<unsigned, unsigned>(Reg, NewReg)); + } + + // If we fall-out here, then every register in the group can be + // renamed, as recorded in RenameMap. + RenameOrder.erase(SuperRC); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); + DEBUG(dbgs() << "]\n"); + return true; + + next_super_reg: + DEBUG(dbgs() << ']'); + } while (R != EndR); + + DEBUG(dbgs() << '\n'); + + // No registers are free and available! + return false; +} + +/// BreakAntiDependencies - Identifiy anti-dependencies within the +/// ScheduleDAG and break them by renaming registers. +/// +unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( + const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) { + + std::vector<unsigned> &KillIndices = State->GetKillIndices(); + std::vector<unsigned> &DefIndices = State->GetDefIndices(); + std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>& + RegRefs = State->GetRegRefs(); + + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // For each regclass the next register to use for renaming. + RenameOrderType RenameOrder; + + // ...need a map from MI to SUnit. + std::map<MachineInstr *, const SUnit *> MISUnitMap; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + MISUnitMap.insert(std::pair<MachineInstr *, const SUnit *>(SU->getInstr(), + SU)); + } + + // Track progress along the critical path through the SUnit graph as + // we walk the instructions. This is needed for regclasses that only + // break critical-path anti-dependencies. + const SUnit *CriticalPathSU = nullptr; + MachineInstr *CriticalPathMI = nullptr; + if (CriticalPathSet.any()) { + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + if (!CriticalPathSU || + ((SU->getDepth() + SU->Latency) > + (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) { + CriticalPathSU = SU; + } + } + + CriticalPathMI = CriticalPathSU->getInstr(); + } + +#ifndef NDEBUG + DEBUG(dbgs() << "\n===== Aggressive anti-dependency breaking\n"); + DEBUG(dbgs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (!State->IsLive(Reg)) + DEBUG(dbgs() << " " << TRI->getName(Reg)); + } + DEBUG(dbgs() << '\n'); +#endif + + // Attempt to break anti-dependence edges. Walk the instructions + // from the bottom up, tracking information about liveness as we go + // to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; + I != E; --Count) { + MachineInstr *MI = --I; + + if (MI->isDebugValue()) + continue; + + DEBUG(dbgs() << "Anti: "); + DEBUG(MI->dump()); + + std::set<unsigned> PassthruRegs; + GetPassthruRegs(MI, PassthruRegs); + + // Process the defs in MI... + PrescanInstruction(MI, Count, PassthruRegs); + + // The dependence edges that represent anti- and output- + // dependencies that are candidates for breaking. + std::vector<const SDep *> Edges; + const SUnit *PathSU = MISUnitMap[MI]; + AntiDepEdges(PathSU, Edges); + + // If MI is not on the critical path, then we don't rename + // registers in the CriticalPathSet. + BitVector *ExcludeRegs = nullptr; + if (MI == CriticalPathMI) { + CriticalPathSU = CriticalPathStep(CriticalPathSU); + CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : nullptr; + } else if (CriticalPathSet.any()) { + ExcludeRegs = &CriticalPathSet; + } + + // Ignore KILL instructions (they form a group in ScanInstruction + // but don't cause any anti-dependence breaking themselves) + if (!MI->isKill()) { + // Attempt to break each anti-dependency... + for (unsigned i = 0, e = Edges.size(); i != e; ++i) { + const SDep *Edge = Edges[i]; + SUnit *NextSU = Edge->getSUnit(); + + if ((Edge->getKind() != SDep::Anti) && + (Edge->getKind() != SDep::Output)) continue; + + unsigned AntiDepReg = Edge->getReg(); + DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg)); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + + if (!MRI.isAllocatable(AntiDepReg)) { + // Don't break anti-dependencies on non-allocatable registers. + DEBUG(dbgs() << " (non-allocatable)\n"); + continue; + } else if (ExcludeRegs && ExcludeRegs->test(AntiDepReg)) { + // Don't break anti-dependencies for critical path registers + // if not on the critical path + DEBUG(dbgs() << " (not critical-path)\n"); + continue; + } else if (PassthruRegs.count(AntiDepReg) != 0) { + // If the anti-dep register liveness "passes-thru", then + // don't try to change it. It will be changed along with + // the use if required to break an earlier antidep. + DEBUG(dbgs() << " (passthru)\n"); + continue; + } else { + // No anti-dep breaking for implicit deps + MachineOperand *AntiDepOp = MI->findRegisterDefOperand(AntiDepReg); + assert(AntiDepOp && "Can't find index for defined register operand"); + if (!AntiDepOp || AntiDepOp->isImplicit()) { + DEBUG(dbgs() << " (implicit)\n"); + continue; + } + + // If the SUnit has other dependencies on the SUnit that + // it anti-depends on, don't bother breaking the + // anti-dependency since those edges would prevent such + // units from being scheduled past each other + // regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + for (SUnit::const_pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && + (P->getKind() != SDep::Output)) { + DEBUG(dbgs() << " (real dependency)\n"); + AntiDepReg = 0; + break; + } else if ((P->getSUnit() != NextSU) && + (P->getKind() == SDep::Data) && + (P->getReg() == AntiDepReg)) { + DEBUG(dbgs() << " (other dependency)\n"); + AntiDepReg = 0; + break; + } + } + + if (AntiDepReg == 0) continue; + } + + assert(AntiDepReg != 0); + if (AntiDepReg == 0) continue; + + // Determine AntiDepReg's register group. + const unsigned GroupIndex = State->GetGroup(AntiDepReg); + if (GroupIndex == 0) { + DEBUG(dbgs() << " (zero group)\n"); + continue; + } + + DEBUG(dbgs() << '\n'); + + // Look for a suitable register to use to break the anti-dependence. + std::map<unsigned, unsigned> RenameMap; + if (FindSuitableFreeRegisters(GroupIndex, RenameOrder, RenameMap)) { + DEBUG(dbgs() << "\tBreaking anti-dependence edge on " + << TRI->getName(AntiDepReg) << ":"); + + // Handle each group register... + for (std::map<unsigned, unsigned>::iterator + S = RenameMap.begin(), E = RenameMap.end(); S != E; ++S) { + unsigned CurrReg = S->first; + unsigned NewReg = S->second; + + DEBUG(dbgs() << " " << TRI->getName(CurrReg) << "->" << + TRI->getName(NewReg) << "(" << + RegRefs.count(CurrReg) << " refs)"); + + // Update the references to the old register CurrReg to + // refer to the new register NewReg. + for (const auto &Q : make_range(RegRefs.equal_range(CurrReg))) { + Q.second.Operand->setReg(NewReg); + // If the SU for the instruction being updated has debug + // information related to the anti-dependency register, make + // sure to update that as well. + const SUnit *SU = MISUnitMap[Q.second.Operand->getParent()]; + if (!SU) continue; + for (DbgValueVector::iterator DVI = DbgValues.begin(), + DVE = DbgValues.end(); DVI != DVE; ++DVI) + if (DVI->second == Q.second.Operand->getParent()) + UpdateDbgValue(DVI->first, AntiDepReg, NewReg); + } + + // We just went back in time and modified history; the + // liveness information for CurrReg is now inconsistent. Set + // the state as if it were dead. + State->UnionGroups(NewReg, 0); + RegRefs.erase(NewReg); + DefIndices[NewReg] = DefIndices[CurrReg]; + KillIndices[NewReg] = KillIndices[CurrReg]; + + State->UnionGroups(CurrReg, 0); + RegRefs.erase(CurrReg); + DefIndices[CurrReg] = KillIndices[CurrReg]; + KillIndices[CurrReg] = ~0u; + assert(((KillIndices[CurrReg] == ~0u) != + (DefIndices[CurrReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + } + + ++Broken; + DEBUG(dbgs() << '\n'); + } + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h new file mode 100644 index 0000000..eba7383 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -0,0 +1,179 @@ +//=- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AggressiveAntiDepBreaker class, which +// implements register anti-dependence breaking during post-RA +// scheduling. It attempts to break all anti-dependencies within a +// block. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H + +#include "AntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <map> + +namespace llvm { +class RegisterClassInfo; + + /// Contains all the state necessary for anti-dep breaking. +class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepState { + public: + /// Information about a register reference within a liverange + typedef struct { + /// The registers operand + MachineOperand *Operand; + /// The register class + const TargetRegisterClass *RC; + } RegisterReference; + + private: + /// Number of non-virtual target registers (i.e. TRI->getNumRegs()). + const unsigned NumTargetRegs; + + /// Implements a disjoint-union data structure to + /// form register groups. A node is represented by an index into + /// the vector. A node can "point to" itself to indicate that it + /// is the parent of a group, or point to another node to indicate + /// that it is a member of the same group as that node. + std::vector<unsigned> GroupNodes; + + /// For each register, the index of the GroupNode + /// currently representing the group that the register belongs to. + /// Register 0 is always represented by the 0 group, a group + /// composed of registers that are not eligible for anti-aliasing. + std::vector<unsigned> GroupNodeIndices; + + /// Map registers to all their references within a live range. + std::multimap<unsigned, RegisterReference> RegRefs; + + /// The index of the most recent kill (proceeding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + /// The index of the most recent complete def (proceeding bottom + /// up), or ~0u if the register is live. + std::vector<unsigned> DefIndices; + + public: + AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB); + + /// Return the kill indices. + std::vector<unsigned> &GetKillIndices() { return KillIndices; } + + /// Return the define indices. + std::vector<unsigned> &GetDefIndices() { return DefIndices; } + + /// Return the RegRefs map. + std::multimap<unsigned, RegisterReference>& GetRegRefs() { return RegRefs; } + + // Get the group for a register. The returned value is + // the index of the GroupNode representing the group. + unsigned GetGroup(unsigned Reg); + + // Return a vector of the registers belonging to a group. + // If RegRefs is non-NULL then only included referenced registers. + void GetGroupRegs( + unsigned Group, + std::vector<unsigned> &Regs, + std::multimap<unsigned, + AggressiveAntiDepState::RegisterReference> *RegRefs); + + // Union Reg1's and Reg2's groups to form a new group. + // Return the index of the GroupNode representing the group. + unsigned UnionGroups(unsigned Reg1, unsigned Reg2); + + // Remove a register from its current group and place + // it alone in its own group. Return the index of the GroupNode + // representing the registers new group. + unsigned LeaveGroup(unsigned Reg); + + /// Return true if Reg is live. + bool IsLive(unsigned Reg); + }; + + class LLVM_LIBRARY_VISIBILITY AggressiveAntiDepBreaker + : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; + + /// The set of registers that should only be + /// renamed if they are on the critical path. + BitVector CriticalPathSet; + + /// The state used to identify and rename anti-dependence registers. + AggressiveAntiDepState *State; + + public: + AggressiveAntiDepBreaker(MachineFunction& MFi, + const RegisterClassInfo &RCI, + TargetSubtargetInfo::RegClassVector& CriticalPathRCs); + ~AggressiveAntiDepBreaker() override; + + /// Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB) override; + + /// Identifiy anti-dependencies along the critical path + /// of the ScheduleDAG and break them by renaming registers. + /// + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) override; + + /// Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) override; + + /// Finish anti-dep breaking for a basic block. + void FinishBlock() override; + + private: + /// Keep track of a position in the allocation order for each regclass. + typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType; + + /// Return true if MO represents a register + /// that is both implicitly used and defined in MI + bool IsImplicitDefUse(MachineInstr *MI, MachineOperand& MO); + + /// If MI implicitly def/uses a register, then + /// return that register and all subregisters. + void GetPassthruRegs(MachineInstr *MI, std::set<unsigned>& PassthruRegs); + + void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, + const char *header = nullptr, + const char *footer = nullptr); + + void PrescanInstruction(MachineInstr *MI, unsigned Count, + std::set<unsigned>& PassthruRegs); + void ScanInstruction(MachineInstr *MI, unsigned Count); + BitVector GetRenameRegisters(unsigned Reg); + bool FindSuitableFreeRegisters(unsigned AntiDepGroupIndex, + RenameOrderType& RenameOrder, + std::map<unsigned, unsigned> &RenameMap); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp new file mode 100644 index 0000000..40451c0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp @@ -0,0 +1,54 @@ +//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an allocation order for virtual registers. +// +// The preferred allocation order for a virtual register depends on allocation +// hints and target hooks. The AllocationOrder class encapsulates all of that. +// +//===----------------------------------------------------------------------===// + +#include "AllocationOrder.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +// Compare VirtRegMap::getRegAllocPref(). +AllocationOrder::AllocationOrder(unsigned VirtReg, + const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix) + : Pos(0) { + const MachineFunction &MF = VRM.getMachineFunction(); + const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); + Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM, Matrix); + rewind(); + + DEBUG({ + if (!Hints.empty()) { + dbgs() << "hints:"; + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + dbgs() << ' ' << PrintReg(Hints[I], TRI); + dbgs() << '\n'; + } + }); +#ifndef NDEBUG + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() && + "Target hint is outside allocation order."); +#endif +} diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h new file mode 100644 index 0000000..2aee3a6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h @@ -0,0 +1,89 @@ +//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an allocation order for virtual registers. +// +// The preferred allocation order for a virtual register depends on allocation +// hints and target hooks. The AllocationOrder class encapsulates all of that. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ALLOCATIONORDER_H +#define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCRegisterInfo.h" + +namespace llvm { + +class RegisterClassInfo; +class VirtRegMap; +class LiveRegMatrix; + +class LLVM_LIBRARY_VISIBILITY AllocationOrder { + SmallVector<MCPhysReg, 16> Hints; + ArrayRef<MCPhysReg> Order; + int Pos; + +public: + /// Create a new AllocationOrder for VirtReg. + /// @param VirtReg Virtual register to allocate for. + /// @param VRM Virtual register map for function. + /// @param RegClassInfo Information about reserved and allocatable registers. + AllocationOrder(unsigned VirtReg, + const VirtRegMap &VRM, + const RegisterClassInfo &RegClassInfo, + const LiveRegMatrix *Matrix); + + /// Get the allocation order without reordered hints. + ArrayRef<MCPhysReg> getOrder() const { return Order; } + + /// Return the next physical register in the allocation order, or 0. + /// It is safe to call next() again after it returned 0, it will keep + /// returning 0 until rewind() is called. + unsigned next(unsigned Limit = 0) { + if (Pos < 0) + return Hints.end()[Pos++]; + if (!Limit) + Limit = Order.size(); + while (Pos < int(Limit)) { + unsigned Reg = Order[Pos++]; + if (!isHint(Reg)) + return Reg; + } + return 0; + } + + /// As next(), but allow duplicates to be returned, and stop before the + /// Limit'th register in the RegisterClassInfo allocation order. + /// + /// This can produce more than Limit registers if there are hints. + unsigned nextWithDups(unsigned Limit) { + if (Pos < 0) + return Hints.end()[Pos++]; + if (Pos < int(Limit)) + return Order[Pos++]; + return 0; + } + + /// Start over from the beginning. + void rewind() { Pos = -int(Hints.size()); } + + /// Return true if the last register returned from next() was a preferred register. + bool isHint() const { return Pos <= 0; } + + /// Return true if PhysReg is a preferred register. + bool isHint(unsigned PhysReg) const { + return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end(); + } +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp new file mode 100644 index 0000000..75579a2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -0,0 +1,741 @@ +//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines several CodeGen-specific LLVM IR analysis utilities. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" + +using namespace llvm; + +/// Compute the linearized index of a member in a nested aggregate/struct/array +/// by recursing and accumulating CurIndex as long as there are indices in the +/// index list. +unsigned llvm::ComputeLinearIndex(Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(*EI, nullptr, nullptr, CurIndex); + } + assert(!Indices && "Unexpected out of bound"); + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Type *EltTy = ATy->getElementType(); + unsigned NumElts = ATy->getNumElements(); + // Compute the Linear offset when jumping one element of the array + unsigned EltLinearOffset = ComputeLinearIndex(EltTy, nullptr, nullptr, 0); + if (Indices) { + assert(*Indices < NumElts && "Unexpected out of bound"); + // If the indice is inside the array, compute the index to the requested + // elt and recurse inside the element with the end of the indices list + CurIndex += EltLinearOffset* *Indices; + return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex); + } + CurIndex += EltLinearOffset*NumElts; + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL, + Type *Ty, SmallVectorImpl<EVT> &ValueVTs, + SmallVectorImpl<uint64_t> *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (StructType *STy = dyn_cast<StructType>(Ty)) { + const StructLayout *SL = DL.getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { + Type *EltTy = ATy->getElementType(); + uint64_t EltSize = DL.getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty->isVoidTy()) + return; + // Base case: we can get an EVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(DL, Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalValue *llvm::ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalValue *GV = dyn_cast<GlobalValue>(V); + GlobalVariable *Var = dyn_cast<GlobalVariable>(V); + + if (Var && Var->getName() == "llvm.eh.catch.all.value") { + assert(Var->hasInitializer() && + "The EH catch-all value must have an initializer"); + Value *Init = Var->getInitializer(); + GV = dyn_cast<GlobalValue>(Init); + if (!GV) V = cast<ConstantPointerNull>(Init); + } + + assert((GV || isa<ConstantPointerNull>(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being +/// processed uses a memory 'm' constraint. +bool +llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos, + const TargetLowering &TLI) { + for (unsigned i = 0, e = CInfos.size(); i != e; ++i) { + InlineAsm::ConstraintInfo &CI = CInfos[i]; + for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]); + if (CType == TargetLowering::C_Memory) + return true; + } + + // Indirect operand accesses access memory. + if (CI.isIndirect) + return true; + } + + return false; +} + +/// getFCmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR floating-point condition code. This includes +/// consideration of global floating-point math flags. +/// +ISD::CondCode llvm::getFCmpCondCode(FCmpInst::Predicate Pred) { + switch (Pred) { + case FCmpInst::FCMP_FALSE: return ISD::SETFALSE; + case FCmpInst::FCMP_OEQ: return ISD::SETOEQ; + case FCmpInst::FCMP_OGT: return ISD::SETOGT; + case FCmpInst::FCMP_OGE: return ISD::SETOGE; + case FCmpInst::FCMP_OLT: return ISD::SETOLT; + case FCmpInst::FCMP_OLE: return ISD::SETOLE; + case FCmpInst::FCMP_ONE: return ISD::SETONE; + case FCmpInst::FCMP_ORD: return ISD::SETO; + case FCmpInst::FCMP_UNO: return ISD::SETUO; + case FCmpInst::FCMP_UEQ: return ISD::SETUEQ; + case FCmpInst::FCMP_UGT: return ISD::SETUGT; + case FCmpInst::FCMP_UGE: return ISD::SETUGE; + case FCmpInst::FCMP_ULT: return ISD::SETULT; + case FCmpInst::FCMP_ULE: return ISD::SETULE; + case FCmpInst::FCMP_UNE: return ISD::SETUNE; + case FCmpInst::FCMP_TRUE: return ISD::SETTRUE; + default: llvm_unreachable("Invalid FCmp predicate opcode!"); + } +} + +ISD::CondCode llvm::getFCmpCodeWithoutNaN(ISD::CondCode CC) { + switch (CC) { + case ISD::SETOEQ: case ISD::SETUEQ: return ISD::SETEQ; + case ISD::SETONE: case ISD::SETUNE: return ISD::SETNE; + case ISD::SETOLT: case ISD::SETULT: return ISD::SETLT; + case ISD::SETOLE: case ISD::SETULE: return ISD::SETLE; + case ISD::SETOGT: case ISD::SETUGT: return ISD::SETGT; + case ISD::SETOGE: case ISD::SETUGE: return ISD::SETGE; + default: return CC; + } +} + +/// getICmpCondCode - Return the ISD condition code corresponding to +/// the given LLVM IR integer condition code. +/// +ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { + switch (Pred) { + case ICmpInst::ICMP_EQ: return ISD::SETEQ; + case ICmpInst::ICMP_NE: return ISD::SETNE; + case ICmpInst::ICMP_SLE: return ISD::SETLE; + case ICmpInst::ICMP_ULE: return ISD::SETULE; + case ICmpInst::ICMP_SGE: return ISD::SETGE; + case ICmpInst::ICMP_UGE: return ISD::SETUGE; + case ICmpInst::ICMP_SLT: return ISD::SETLT; + case ICmpInst::ICMP_ULT: return ISD::SETULT; + case ICmpInst::ICMP_SGT: return ISD::SETGT; + case ICmpInst::ICMP_UGT: return ISD::SETUGT; + default: + llvm_unreachable("Invalid ICmp predicate opcode!"); + } +} + +static bool isNoopBitcast(Type *T1, Type *T2, + const TargetLoweringBase& TLI) { + return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || + (isa<VectorType>(T1) && isa<VectorType>(T2) && + TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); +} + +/// Look through operations that will be free to find the earliest source of +/// this value. +/// +/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// scalar component. This records its address; the reverse of this list gives a +/// sequence of indices appropriate for an extractvalue to locate the important +/// value. This value is updated during the function and on exit will indicate +/// similar information for the Value returned. +/// +/// @param DataBits If this function looks through truncate instructions, this +/// will record the smallest size attained. +static const Value *getNoopInput(const Value *V, + SmallVectorImpl<unsigned> &ValLoc, + unsigned &DataBits, + const TargetLoweringBase &TLI, + const DataLayout &DL) { + while (true) { + // Try to look through V1; if V1 is not an instruction, it can't be looked + // through. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I || I->getNumOperands() == 0) return V; + const Value *NoopInput = nullptr; + + Value *Op = I->getOperand(0); + if (isa<BitCastInst>(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa<GetElementPtrInst>(I)) { + // Look through getelementptr + if (cast<GetElementPtrInst>(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa<IntToPtrInst>(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + DL.getPointerSizeInBits() == + cast<IntegerType>(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<PtrToIntInst>(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa<VectorType>(I->getType()) && + DL.getPointerSizeInBits() == + cast<IntegerType>(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa<TruncInst>(I) && + TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { + DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + NoopInput = Op; + } else if (isa<CallInst>(I)) { + // Look through call (skipping callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<CallInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } else if (isa<InvokeInst>(I)) { + // Look through invoke (skipping BB, BB, Callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast<InvokeInst>(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; + } + } + } else if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(V)) { + // Value may come from either the aggregate or the scalar + ArrayRef<unsigned> InsertLoc = IVI->getIndices(); + if (ValLoc.size() >= InsertLoc.size() && + std::equal(InsertLoc.begin(), InsertLoc.end(), ValLoc.rbegin())) { + // The type being inserted is a nested sub-type of the aggregate; we + // have to remove those initial indices to get the location we're + // interested in for the operand. + ValLoc.resize(ValLoc.size() - InsertLoc.size()); + NoopInput = IVI->getInsertedValueOperand(); + } else { + // The struct we're inserting into has the value we're interested in, no + // change of address. + NoopInput = Op; + } + } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(V)) { + // The part we're interested in will inevitably be some sub-section of the + // previous aggregate. Combine the two paths to obtain the true address of + // our element. + ArrayRef<unsigned> ExtractLoc = EVI->getIndices(); + ValLoc.append(ExtractLoc.rbegin(), ExtractLoc.rend()); + NoopInput = Op; + } + // Terminate if we couldn't find anything to look through. + if (!NoopInput) + return V; + + V = NoopInput; + } +} + +/// Return true if this scalar return value only has bits discarded on its path +/// from the "tail call" to the "ret". This includes the obvious noop +/// instructions handled by getNoopInput above as well as free truncations (or +/// extensions prior to the call). +static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, + SmallVectorImpl<unsigned> &RetIndices, + SmallVectorImpl<unsigned> &CallIndices, + bool AllowDifferingSizes, + const TargetLoweringBase &TLI, + const DataLayout &DL) { + + // Trace the sub-value needed by the return value as far back up the graph as + // possible, in the hope that it will intersect with the value produced by the + // call. In the simple case with no "returned" attribute, the hope is actually + // that we end up back at the tail call instruction itself. + unsigned BitsRequired = UINT_MAX; + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI, DL); + + // If this slot in the value returned is undef, it doesn't matter what the + // call puts there, it'll be fine. + if (isa<UndefValue>(RetVal)) + return true; + + // Now do a similar search up through the graph to find where the value + // actually returned by the "tail call" comes from. In the simple case without + // a "returned" attribute, the search will be blocked immediately and the loop + // a Noop. + unsigned BitsProvided = UINT_MAX; + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI, DL); + + // There's no hope if we can't actually trace them to (the same part of!) the + // same value. + if (CallVal != RetVal || CallIndices != RetIndices) + return false; + + // However, intervening truncates may have made the call non-tail. Make sure + // all the bits that are needed by the "ret" have been provided by the "tail + // call". FIXME: with sufficiently cunning bit-tracking, we could look through + // extensions too. + if (BitsProvided < BitsRequired || + (!AllowDifferingSizes && BitsProvided != BitsRequired)) + return false; + + return true; +} + +/// For an aggregate type, determine whether a given index is within bounds or +/// not. +static bool indexReallyValid(CompositeType *T, unsigned Idx) { + if (ArrayType *AT = dyn_cast<ArrayType>(T)) + return Idx < AT->getNumElements(); + + return Idx < cast<StructType>(T)->getNumElements(); +} + +/// Move the given iterators to the next leaf type in depth first traversal. +/// +/// Performs a depth-first traversal of the type as specified by its arguments, +/// stopping at the next leaf node (which may be a legitimate scalar type or an +/// empty struct or array). +/// +/// @param SubTypes List of the partial components making up the type from +/// outermost to innermost non-empty aggregate. The element currently +/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). +/// +/// @param Path Set of extractvalue indices leading from the outermost type +/// (SubTypes[0]) to the leaf node currently represented. +/// +/// @returns true if a new type was found, false otherwise. Calling this +/// function again on a finished iterator will repeatedly return +/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty +/// aggregate or a non-aggregate +static bool advanceToNextLeafType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First march back up the tree until we can successfully increment one of the + // coordinates in Path. + while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { + Path.pop_back(); + SubTypes.pop_back(); + } + + // If we reached the top, then the iterator is done. + if (Path.empty()) + return false; + + // We know there's *some* valid leaf now, so march back down the tree picking + // out the left-most element at each node. + ++Path.back(); + Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + while (DeeperType->isAggregateType()) { + CompositeType *CT = cast<CompositeType>(DeeperType); + if (!indexReallyValid(CT, 0)) + return true; + + SubTypes.push_back(CT); + Path.push_back(0); + + DeeperType = CT->getTypeAtIndex(0U); + } + + return true; +} + +/// Find the first non-empty, scalar-like type in Next and setup the iterator +/// components. +/// +/// Assuming Next is an aggregate of some kind, this function will traverse the +/// tree from left to right (i.e. depth-first) looking for the first +/// non-aggregate type which will play a role in function return. +/// +/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup +/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first +/// i32 in that type. +static bool firstRealType(Type *Next, + SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + // First initialise the iterator components to the first "leaf" node + // (i.e. node with no valid sub-type at any index, so {} does count as a leaf + // despite nominally being an aggregate). + while (Next->isAggregateType() && + indexReallyValid(cast<CompositeType>(Next), 0)) { + SubTypes.push_back(cast<CompositeType>(Next)); + Path.push_back(0); + Next = cast<CompositeType>(Next)->getTypeAtIndex(0U); + } + + // If there's no Path now, Next was originally scalar already (or empty + // leaf). We're done. + if (Path.empty()) + return true; + + // Otherwise, use normal iteration to keep looking through the tree until we + // find a non-aggregate type. + while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + } + + return true; +} + +/// Set the iterator data-structures to the next non-empty, non-aggregate +/// subtype. +static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, + SmallVectorImpl<unsigned> &Path) { + do { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + + assert(!Path.empty() && "found a leaf but didn't set the path?"); + } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + + return true; +} + + +/// Test if the given instruction is in a position to be optimized +/// with a tail-call. This roughly means that it's in a block with +/// a return and there's nothing that needs to be scheduled +/// between it and the return. +/// +/// This function only tests target-independent requirements. +bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) { + const Instruction *I = CS.getInstruction(); + const BasicBlock *ExitBB = I->getParent(); + const TerminatorInst *Term = ExitBB->getTerminator(); + const ReturnInst *Ret = dyn_cast<ReturnInst>(Term); + + // The block must end in a return statement or unreachable. + // + // FIXME: Decline tailcall if it's not guaranteed and if the block ends in + // an unreachable, for now. The way tailcall optimization is currently + // implemented means it will add an epilogue followed by a jump. That is + // not profitable. Also, if the callee is a special function (e.g. + // longjmp on x86), it can end up causing miscompilation that has not + // been fully understood. + if (!Ret && + (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) + return false; + + // If I will have a chain, make sure no other instruction that will have a + // chain interposes between I and the return. + if (I->mayHaveSideEffects() || I->mayReadFromMemory() || + !isSafeToSpeculativelyExecute(I)) + for (BasicBlock::const_iterator BBI = std::prev(ExitBB->end(), 2);; --BBI) { + if (&*BBI == I) + break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; + if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || + !isSafeToSpeculativelyExecute(&*BBI)) + return false; + } + + const Function *F = ExitBB->getParent(); + return returnTypeIsEligibleForTailCall( + F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering()); +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { + // If the block ends with a void return or unreachable, it doesn't matter + // what the call's return type is. + if (!Ret || Ret->getNumOperands() == 0) return true; + + // If the return value is undef, it doesn't matter what the call's + // return type is. + if (isa<UndefValue>(Ret->getOperand(0))) return true; + + // Make sure the attributes attached to each return are compatible. + AttrBuilder CallerAttrs(F->getAttributes(), + AttributeSet::ReturnIndex); + AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(), + AttributeSet::ReturnIndex); + + // Noalias is completely benign as far as calling convention goes, it + // shouldn't affect whether the call is a tail call. + CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + + bool AllowDifferingSizes = true; + if (CallerAttrs.contains(Attribute::ZExt)) { + if (!CalleeAttrs.contains(Attribute::ZExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::ZExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } else if (CallerAttrs.contains(Attribute::SExt)) { + if (!CalleeAttrs.contains(Attribute::SExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::SExt); + } + + // If they're still different, there's some facet we don't understand + // (currently only "inreg", but in future who knows). It may be OK but the + // only safe option is to reject the tail call. + if (CallerAttrs != CalleeAttrs) + return false; + + const Value *RetVal = Ret->getOperand(0), *CallVal = I; + SmallVector<unsigned, 4> RetPath, CallPath; + SmallVector<CompositeType *, 4> RetSubTypes, CallSubTypes; + + bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); + bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); + + // Nothing's actually returned, it doesn't matter what the callee put there + // it's a valid tail call. + if (RetEmpty) + return true; + + // Iterate pairwise through each of the value types making up the tail call + // and the corresponding return. For each one we want to know whether it's + // essentially going directly from the tail call to the ret, via operations + // that end up not generating any code. + // + // We allow a certain amount of covariance here. For example it's permitted + // for the tail call to define more bits than the ret actually cares about + // (e.g. via a truncate). + do { + if (CallEmpty) { + // We've exhausted the values produced by the tail call instruction, the + // rest are essentially undef. The type doesn't really matter, but we need + // *something*. + Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + CallVal = UndefValue::get(SlotType); + } + + // The manipulations performed when we're looking through an insertvalue or + // an extractvalue would happen at the front of the RetPath list, so since + // we have to copy it anyway it's more efficient to create a reversed copy. + SmallVector<unsigned, 4> TmpRetPath(RetPath.rbegin(), RetPath.rend()); + SmallVector<unsigned, 4> TmpCallPath(CallPath.rbegin(), CallPath.rend()); + + // Finally, we can check whether the value produced by the tail call at this + // index is compatible with the value we return. + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, + AllowDifferingSizes, TLI, + F->getParent()->getDataLayout())) + return false; + + CallEmpty = !nextRealType(CallSubTypes, CallPath); + } while(nextRealType(RetSubTypes, RetPath)); + + return true; +} + +bool llvm::canBeOmittedFromSymbolTable(const GlobalValue *GV) { + if (!GV->hasLinkOnceODRLinkage()) + return false; + + if (GV->hasUnnamedAddr()) + return true; + + // If it is a non constant variable, it needs to be uniqued across shared + // objects. + if (const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV)) { + if (!Var->isConstant()) + return false; + } + + // An alias can point to a variable. We could try to resolve the alias to + // decide, but for now just don't hide them. + if (isa<GlobalAlias>(GV)) + return false; + + GlobalStatus GS; + if (GlobalStatus::analyzeGlobal(GV, GS)) + return false; + + return !GS.IsCompared; +} + +static void collectFuncletMembers( + DenseMap<const MachineBasicBlock *, int> &FuncletMembership, int Funclet, + const MachineBasicBlock *MBB) { + // Add this MBB to our funclet. + auto P = FuncletMembership.insert(std::make_pair(MBB, Funclet)); + + // Don't revisit blocks. + if (!P.second) { + assert(P.first->second == Funclet && "MBB is part of two funclets!"); + return; + } + + bool IsReturn = false; + int NumTerminators = 0; + for (const MachineInstr &MI : MBB->terminators()) { + IsReturn |= MI.isReturn(); + ++NumTerminators; + } + assert((!IsReturn || NumTerminators == 1) && + "Expected only one terminator when a return is present!"); + + // Returns are boundaries where funclet transfer can occur, don't follow + // successors. + if (IsReturn) + return; + + for (const MachineBasicBlock *SMBB : MBB->successors()) + if (!SMBB->isEHPad()) + collectFuncletMembers(FuncletMembership, Funclet, SMBB); +} + +DenseMap<const MachineBasicBlock *, int> +llvm::getFuncletMembership(const MachineFunction &MF) { + DenseMap<const MachineBasicBlock *, int> FuncletMembership; + + // We don't have anything to do if there aren't any EH pads. + if (!MF.getMMI().hasEHFunclets()) + return FuncletMembership; + + int EntryBBNumber = MF.front().getNumber(); + bool IsSEH = isAsynchronousEHPersonality( + classifyEHPersonality(MF.getFunction()->getPersonalityFn())); + + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + SmallVector<const MachineBasicBlock *, 16> FuncletBlocks; + SmallVector<const MachineBasicBlock *, 16> UnreachableBlocks; + SmallVector<const MachineBasicBlock *, 16> SEHCatchPads; + SmallVector<std::pair<const MachineBasicBlock *, int>, 16> CatchRetSuccessors; + for (const MachineBasicBlock &MBB : MF) { + if (MBB.isEHFuncletEntry()) { + FuncletBlocks.push_back(&MBB); + } else if (IsSEH && MBB.isEHPad()) { + SEHCatchPads.push_back(&MBB); + } else if (MBB.pred_empty()) { + UnreachableBlocks.push_back(&MBB); + } + + MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator(); + // CatchPads are not funclets for SEH so do not consider CatchRet to + // transfer control to another funclet. + if (MBBI->getOpcode() != TII->getCatchReturnOpcode()) + continue; + + // FIXME: SEH CatchPads are not necessarily in the parent function: + // they could be inside a finally block. + const MachineBasicBlock *Successor = MBBI->getOperand(0).getMBB(); + const MachineBasicBlock *SuccessorColor = MBBI->getOperand(1).getMBB(); + CatchRetSuccessors.push_back( + {Successor, IsSEH ? EntryBBNumber : SuccessorColor->getNumber()}); + } + + // We don't have anything to do if there aren't any EH pads. + if (FuncletBlocks.empty()) + return FuncletMembership; + + // Identify all the basic blocks reachable from the function entry. + collectFuncletMembers(FuncletMembership, EntryBBNumber, &MF.front()); + // All blocks not part of a funclet are in the parent function. + for (const MachineBasicBlock *MBB : UnreachableBlocks) + collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + // Next, identify all the blocks inside the funclets. + for (const MachineBasicBlock *MBB : FuncletBlocks) + collectFuncletMembers(FuncletMembership, MBB->getNumber(), MBB); + // SEH CatchPads aren't really funclets, handle them separately. + for (const MachineBasicBlock *MBB : SEHCatchPads) + collectFuncletMembers(FuncletMembership, EntryBBNumber, MBB); + // Finally, identify all the targets of a catchret. + for (std::pair<const MachineBasicBlock *, int> CatchRetPair : + CatchRetSuccessors) + collectFuncletMembers(FuncletMembership, CatchRetPair.second, + CatchRetPair.first); + return FuncletMembership; +} diff --git a/contrib/llvm/lib/CodeGen/AntiDepBreaker.h b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h new file mode 100644 index 0000000..9f05200 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AntiDepBreaker.h @@ -0,0 +1,67 @@ +//=- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AntiDepBreaker class, which implements +// anti-dependence breaking heuristics for post-register-allocation scheduling. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_ANTIDEPBREAKER_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <vector> + +namespace llvm { + +/// This class works in conjunction with the post-RA scheduler to rename +/// registers to break register anti-dependencies (WAR hazards). +class LLVM_LIBRARY_VISIBILITY AntiDepBreaker { +public: + typedef std::vector<std::pair<MachineInstr *, MachineInstr *> > + DbgValueVector; + + virtual ~AntiDepBreaker(); + + /// Initialize anti-dep breaking for a new basic block. + virtual void StartBlock(MachineBasicBlock *BB) =0; + + /// Identifiy anti-dependencies within a basic-block region and break them by + /// renaming registers. Return the number of anti-dependencies broken. + virtual unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) = 0; + + /// Update liveness information to account for the current + /// instruction, which will not be scheduled. + virtual void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) =0; + + /// Finish anti-dep breaking for a basic block. + virtual void FinishBlock() =0; + + /// Update DBG_VALUE if dependency breaker is updating + /// other machine instruction to use NewReg. + void UpdateDbgValue(MachineInstr *MI, unsigned OldReg, unsigned NewReg) { + assert (MI->isDebugValue() && "MI is not DBG_VALUE!"); + if (MI && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() == OldReg) + MI->getOperand(0).setReg(NewReg); + } +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp new file mode 100644 index 0000000..ade2d71 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -0,0 +1,141 @@ +//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +ARMException::ARMException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {} + +ARMException::~ARMException() {} + +ARMTargetStreamer &ARMException::getTargetStreamer() { + MCTargetStreamer &TS = *Asm->OutStreamer->getTargetStreamer(); + return static_cast<ARMTargetStreamer &>(TS); +} + +/// endModule - Emit all exception information that should come after the +/// content. +void ARMException::endModule() { + if (shouldEmitCFI) + Asm->OutStreamer->EmitCFISections(false, true); +} + +void ARMException::beginFunction(const MachineFunction *MF) { + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + getTargetStreamer().emitFnStart(); + // See if we need call frame info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + assert(MoveType != AsmPrinter::CFI_M_EH && + "non-EH CFI not yet supported in prologue with EHABI lowering"); + if (MoveType == AsmPrinter::CFI_M_Debug) { + shouldEmitCFI = true; + Asm->OutStreamer->EmitCFIStartProc(false); + } +} + +/// endFunction - Gather and emit post-function exception information. +/// +void ARMException::endFunction(const MachineFunction *MF) { + ARMTargetStreamer &ATS = getTargetStreamer(); + const Function *F = MF->getFunction(); + const Function *Per = nullptr; + if (F->hasPersonalityFn()) + Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + bool forceEmitPersonality = + F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F->needsUnwindTableEntry(); + bool shouldEmitPersonality = forceEmitPersonality || + !MMI->getLandingPads().empty(); + if (!Asm->MF->getFunction()->needsUnwindTableEntry() && + !shouldEmitPersonality) + ATS.emitCantUnwind(); + else if (shouldEmitPersonality) { + // Emit references to personality. + if (Per) { + MCSymbol *PerSym = Asm->getSymbol(Per); + Asm->OutStreamer->EmitSymbolAttribute(PerSym, MCSA_Global); + ATS.emitPersonality(PerSym); + } + + // Emit .handlerdata directive. + ATS.emitHandlerData(); + + // Emit actual exception table + emitExceptionTable(); + } + + if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM) + ATS.emitFnEnd(); +} + +void ARMException::emitTypeInfos(unsigned TTypeEncoding) { + const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (const GlobalValue *GV : reverse(TypeInfos)) { + if (VerboseAsm) + Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = 0; + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitTTypeReference((TypeID == 0 ? nullptr : TypeInfos[TypeID - 1]), + TTypeEncoding); + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp new file mode 100644 index 0000000..8c68383 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp @@ -0,0 +1,45 @@ +//===-- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "AddressPool.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +class MCExpr; + +unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) { + HasBeenUsed = true; + auto IterBool = + Pool.insert(std::make_pair(Sym, AddressPoolEntry(Pool.size(), TLS))); + return IterBool.first->second.Number; +} + +// Emit addresses into the section given. +void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) { + if (Pool.empty()) + return; + + // Start the dwarf addr section. + Asm.OutStreamer->SwitchSection(AddrSection); + + // Order the address pool entries by ID + SmallVector<const MCExpr *, 64> Entries(Pool.size()); + + for (const auto &I : Pool) + Entries[I.second.Number] = + I.second.TLS + ? Asm.getObjFileLowering().getDebugThreadLocalSymbol(I.first) + : MCSymbolRefExpr::create(I.first, Asm.OutContext); + + for (const MCExpr *Entry : Entries) + Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h new file mode 100644 index 0000000..211fc98 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.h @@ -0,0 +1,52 @@ +//===-- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ADDRESSPOOL_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +class MCSection; +class MCSymbol; +class AsmPrinter; +// Collection of addresses for this unit and assorted labels. +// A Symbol->unsigned mapping of addresses used by indirect +// references. +class AddressPool { + struct AddressPoolEntry { + unsigned Number; + bool TLS; + AddressPoolEntry(unsigned Number, bool TLS) : Number(Number), TLS(TLS) {} + }; + DenseMap<const MCSymbol *, AddressPoolEntry> Pool; + + /// Record whether the AddressPool has been queried for an address index since + /// the last "resetUsedFlag" call. Used to implement type unit fallback - a + /// type that references addresses cannot be placed in a type unit when using + /// fission. + bool HasBeenUsed; + +public: + AddressPool() : HasBeenUsed(false) {} + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getIndex(const MCSymbol *Sym, bool TLS = false); + + void emit(AsmPrinter &Asm, MCSection *AddrSection); + + bool isEmpty() { return Pool.empty(); } + + bool hasBeenUsed() const { return HasBeenUsed; } + + void resetUsedFlag() { HasBeenUsed = false; } +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp new file mode 100644 index 0000000..be7eafb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -0,0 +1,2626 @@ +//===-- AsmPrinter.cpp - Common AsmPrinter code ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "DwarfDebug.h" +#include "DwarfException.h" +#include "WinException.h" +#include "WinCodeViewLineTables.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/Timer.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "Debug Info Emission"; +static const char *const EHTimerName = "DWARF Exception Writer"; +static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables"; + +STATISTIC(EmittedInsts, "Number of machine instrs printed"); + +char AsmPrinter::ID = 0; + +typedef DenseMap<GCStrategy*, std::unique_ptr<GCMetadataPrinter>> gcp_map_type; +static gcp_map_type &getGCMap(void *&P) { + if (!P) + P = new gcp_map_type(); + return *(gcp_map_type*)P; +} + + +/// getGVAlignmentLog2 - Return the alignment to use for the specified global +/// value in log2 form. This rounds up to the preferred alignment if possible +/// and legal. +static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL, + unsigned InBits = 0) { + unsigned NumBits = 0; + if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) + NumBits = DL.getPreferredAlignmentLog(GVar); + + // If InBits is specified, round it to it. + if (InBits > NumBits) + NumBits = InBits; + + // If the GV has a specified alignment, take it into account. + if (GV->getAlignment() == 0) + return NumBits; + + unsigned GVAlign = Log2_32(GV->getAlignment()); + + // If the GVAlign is larger than NumBits, or if we are required to obey + // NumBits because the GV has an assigned section, obey it. + if (GVAlign > NumBits || GV->hasSection()) + NumBits = GVAlign; + return NumBits; +} + +AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer) + : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), + OutContext(Streamer->getContext()), OutStreamer(std::move(Streamer)), + LastMI(nullptr), LastFn(0), Counter(~0U) { + DD = nullptr; + MMI = nullptr; + LI = nullptr; + MF = nullptr; + CurExceptionSym = CurrentFnSym = CurrentFnSymForSize = nullptr; + CurrentFnBegin = nullptr; + CurrentFnEnd = nullptr; + GCMetadataPrinters = nullptr; + VerboseAsm = OutStreamer->isVerboseAsm(); +} + +AsmPrinter::~AsmPrinter() { + assert(!DD && Handlers.empty() && "Debug/EH info didn't get finalized"); + + if (GCMetadataPrinters) { + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + + delete &GCMap; + GCMetadataPrinters = nullptr; + } +} + +/// getFunctionNumber - Return a unique ID for the current function. +/// +unsigned AsmPrinter::getFunctionNumber() const { + return MF->getFunctionNumber(); +} + +const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { + return *TM.getObjFileLowering(); +} + +const DataLayout &AsmPrinter::getDataLayout() const { + return MMI->getModule()->getDataLayout(); +} + +// Do not use the cached DataLayout because some client use it without a Module +// (llmv-dsymutil, llvm-dwarfdump). +unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); } + +const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const { + assert(MF && "getSubtargetInfo requires a valid MachineFunction!"); + return MF->getSubtarget<MCSubtargetInfo>(); +} + +void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) { + S.EmitInstruction(Inst, getSubtargetInfo()); +} + +StringRef AsmPrinter::getTargetTriple() const { + return TM.getTargetTriple().str(); +} + +/// getCurrentSection() - Return the current section we are emitting to. +const MCSection *AsmPrinter::getCurrentSection() const { + return OutStreamer->getCurrentSection().first; +} + + + +void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); + if (isVerbose()) + AU.addRequired<MachineLoopInfo>(); +} + +bool AsmPrinter::doInitialization(Module &M) { + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + + // Initialize TargetLoweringObjectFile. + const_cast<TargetLoweringObjectFile&>(getObjFileLowering()) + .Initialize(OutContext, TM); + + OutStreamer->InitSections(false); + + Mang = new Mangler(); + + // Emit the version-min deplyment target directive if needed. + // + // FIXME: If we end up with a collection of these sorts of Darwin-specific + // or ELF-specific things, it may make sense to have a platform helper class + // that will work with the target helper class. For now keep it here, as the + // alternative is duplicated code in each of the target asm printers that + // use the directive, where it would need the same conditionalization + // anyway. + Triple TT(getTargetTriple()); + if (TT.isOSDarwin()) { + unsigned Major, Minor, Update; + TT.getOSVersion(Major, Minor, Update); + // If there is a version specified, Major will be non-zero. + if (Major) { + MCVersionMinType VersionType; + if (TT.isWatchOS()) + VersionType = MCVM_WatchOSVersionMin; + else if (TT.isTvOS()) + VersionType = MCVM_TvOSVersionMin; + else if (TT.isMacOSX()) + VersionType = MCVM_OSXVersionMin; + else + VersionType = MCVM_IOSVersionMin; + OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update); + } + } + + // Allow the target to emit any magic that it wants at the start of the file. + EmitStartOfAsmFile(M); + + // Very minimal debug info. It is ignored if we emit actual debug info. If we + // don't, this at least helps the user find where a global came from. + if (MAI->hasSingleParameterDotFile()) { + // .file "foo.c" + OutStreamer->EmitFileDirective(M.getModuleIdentifier()); + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (auto &I : *MI) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*I)) + MP->beginAssembly(M, *MI, *this); + + // Emit module-level inline asm if it exists. + if (!M.getModuleInlineAsm().empty()) { + // We're at the module level. Construct MCSubtarget from the default CPU + // and target triple. + std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo( + TM.getTargetTriple().str(), TM.getTargetCPU(), + TM.getTargetFeatureString())); + OutStreamer->AddComment("Start of file scope inline assembly"); + OutStreamer->AddBlankLine(); + EmitInlineAsm(M.getModuleInlineAsm()+"\n", + OutContext.getSubtargetCopy(*STI), TM.Options.MCOptions); + OutStreamer->AddComment("End of file scope inline assembly"); + OutStreamer->AddBlankLine(); + } + + if (MAI->doesSupportDebugInformation()) { + bool EmitCodeView = MMI->getModule()->getCodeViewFlag(); + if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) { + Handlers.push_back(HandlerInfo(new WinCodeViewLineTables(this), + DbgTimerName, + CodeViewLineTablesGroupName)); + } + if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) { + DD = new DwarfDebug(this, &M); + Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName)); + } + } + + EHStreamer *ES = nullptr; + switch (MAI->getExceptionHandlingType()) { + case ExceptionHandling::None: + break; + case ExceptionHandling::SjLj: + case ExceptionHandling::DwarfCFI: + ES = new DwarfCFIException(this); + break; + case ExceptionHandling::ARM: + ES = new ARMException(this); + break; + case ExceptionHandling::WinEH: + switch (MAI->getWinEHEncodingType()) { + default: llvm_unreachable("unsupported unwinding information encoding"); + case WinEH::EncodingType::Invalid: + break; + case WinEH::EncodingType::X86: + case WinEH::EncodingType::Itanium: + ES = new WinException(this); + break; + } + break; + } + if (ES) + Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName)); + return false; +} + +static bool canBeHidden(const GlobalValue *GV, const MCAsmInfo &MAI) { + if (!MAI.hasWeakDefCanBeHiddenDirective()) + return false; + + return canBeOmittedFromSymbolTable(GV); +} + +void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + switch (Linkage) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + if (MAI->hasWeakDefDirective()) { + // .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + + if (!canBeHidden(GV, *MAI)) + // .weak_definition _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); + else + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_WeakDefAutoPrivate); + } else if (MAI->hasLinkOnceDirective()) { + // .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + //NOTE: linkonce is handled by the section the symbol was assigned to. + } else { + // .weak _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak); + } + return; + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol. + // .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + return; + case GlobalValue::PrivateLinkage: + case GlobalValue::InternalLinkage: + return; + case GlobalValue::AvailableExternallyLinkage: + llvm_unreachable("Should never emit this"); + case GlobalValue::ExternalWeakLinkage: + llvm_unreachable("Don't know how to emit these"); + } + llvm_unreachable("Unknown linkage type!"); +} + +void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name, + const GlobalValue *GV) const { + TM.getNameWithPrefix(Name, GV, *Mang); +} + +MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { + return TM.getSymbol(GV, *Mang); +} + +static MCSymbol *getOrCreateEmuTLSControlSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_v.") + GVSym->getName()); +} + +static MCSymbol *getOrCreateEmuTLSInitSym(MCSymbol *GVSym, MCContext &C) { + return C.getOrCreateSymbol(Twine("__emutls_t.") + GVSym->getName()); +} + +/// EmitEmulatedTLSControlVariable - Emit the control variable for an emulated TLS variable. +void AsmPrinter::EmitEmulatedTLSControlVariable(const GlobalVariable *GV, + MCSymbol *EmittedSym, + bool AllZeroInitValue) { + MCSection *TLSVarSection = getObjFileLowering().getDataSection(); + OutStreamer->SwitchSection(TLSVarSection); + MCSymbol *GVSym = getSymbol(GV); + EmitLinkage(GV, EmittedSym); // same linkage as GV + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + unsigned WordSize = DL.getPointerSize(); + unsigned Alignment = DL.getPointerABIAlignment(); + EmitAlignment(Log2_32(Alignment)); + OutStreamer->EmitLabel(EmittedSym); + OutStreamer->EmitIntValue(Size, WordSize); + OutStreamer->EmitIntValue((1 << AlignLog), WordSize); + OutStreamer->EmitIntValue(0, WordSize); + if (GV->hasInitializer() && !AllZeroInitValue) { + OutStreamer->EmitSymbolValue( + getOrCreateEmuTLSInitSym(GVSym, OutContext), WordSize); + } else + OutStreamer->EmitIntValue(0, WordSize); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedSym), + MCConstantExpr::create(4 * WordSize, OutContext)); + OutStreamer->AddBlankLine(); // End of the __emutls_v.* variable. +} + +/// EmitGlobalVariable - Emit the specified global variable to the .s file. +void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { + bool IsEmuTLSVar = + GV->getThreadLocalMode() != llvm::GlobalVariable::NotThreadLocal && + TM.Options.EmulatedTLS; + assert(!(IsEmuTLSVar && GV->hasCommonLinkage()) && + "No emulated TLS variables in the common section"); + + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; + + // Skip the emission of global equivalents. The symbol can be emitted later + // on by emitGlobalGOTEquivs in case it turns out to be needed. + if (GlobalGOTEquivs.count(getSymbol(GV))) + return; + + if (isVerbose() && !IsEmuTLSVar) { + // When printing the control variable __emutls_v.*, + // we don't need to print the original TLS variable name. + GV->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, GV->getParent()); + OutStreamer->GetCommentOS() << '\n'; + } + } + + MCSymbol *GVSym = getSymbol(GV); + MCSymbol *EmittedSym = IsEmuTLSVar ? + getOrCreateEmuTLSControlSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSControlSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedSym. + + EmitVisibility(EmittedSym, GV->getVisibility(), !GV->isDeclaration()); + + if (!GV->hasInitializer()) // External globals require no extra code. + return; + + GVSym->redefineIfPossible(); + if (GVSym->isDefined() || GVSym->isVariable()) + report_fatal_error("symbol '" + Twine(GVSym->getName()) + + "' is already defined"); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->EmitSymbolAttribute(EmittedSym, MCSA_ELF_TypeObject); + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + + const DataLayout &DL = GV->getParent()->getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + unsigned AlignLog = getGVAlignmentLog2(GV, DL); + + bool AllZeroInitValue = false; + const Constant *InitValue = GV->getInitializer(); + if (isa<ConstantAggregateZero>(InitValue)) + AllZeroInitValue = true; + else { + const ConstantInt *InitIntValue = dyn_cast<ConstantInt>(InitValue); + if (InitIntValue && InitIntValue->isZero()) + AllZeroInitValue = true; + } + if (IsEmuTLSVar) + EmitEmulatedTLSControlVariable(GV, EmittedSym, AllZeroInitValue); + + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->setSymbolSize(GVSym, Size); + } + + // Handle common and BSS local symbols (.lcomm). + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + assert(!(IsEmuTLSVar && GVKind.isCommon()) && + "No emulated TLS variables in the common section"); + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + unsigned Align = 1 << AlignLog; + + // Handle common symbols. + if (GVKind.isCommon()) { + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; + + // .comm _foo, 42, 4 + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + return; + } + + // Handle local BSS symbols. + if (MAI->hasMachoZeroFillDirective()) { + MCSection *TheSection = + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); + // .zerofill __DATA, __bss, _foo, 400, 5 + OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align); + return; + } + + // Use .lcomm only if it supports user-specified alignment. + // Otherwise, while it would still be correct to use .lcomm in some + // cases (e.g. when Align == 1), the external assembler might enfore + // some -unknown- default alignment behavior, which could cause + // spurious differences between external and integrated assembler. + // Prefer to simply fall back to .local / .comm in this case. + if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { + // .lcomm _foo, 42 + OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align); + return; + } + + if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) + Align = 0; + + // .local _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local); + // .comm _foo, 42, 4 + OutStreamer->EmitCommonSymbol(GVSym, Size, Align); + return; + } + + if (IsEmuTLSVar && AllZeroInitValue) + return; // No need of initialization values. + + MCSymbol *EmittedInitSym = IsEmuTLSVar ? + getOrCreateEmuTLSInitSym(GVSym, OutContext) : GVSym; + // getOrCreateEmuTLSInitSym only creates the symbol with name and default attributes. + // GV's or GVSym's attributes will be used for the EmittedInitSym. + + MCSection *TheSection = IsEmuTLSVar ? + getObjFileLowering().getReadOnlySection() : + getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM); + + // Handle the zerofill directive on darwin, which is a special form of BSS + // emission. + if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective() && !IsEmuTLSVar) { + if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. + + // .globl _foo + OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global); + // .zerofill __DATA, __common, _foo, 400, 5 + OutStreamer->EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); + return; + } + + // Handle thread local data for mach-o which requires us to output an + // additional structure of data and mangle the original symbol so that we + // can reference it later. + // + // TODO: This should become an "emit thread local global" method on TLOF. + // All of this macho specific stuff should be sunk down into TLOFMachO and + // stuff like "TLSExtraDataSection" should no longer be part of the parent + // TLOF class. This will also make it more obvious that stuff like + // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho + // specific code. + if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective() && !IsEmuTLSVar) { + // Emit the .tbss symbol + MCSymbol *MangSym = + OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); + + if (GVKind.isThreadBSS()) { + TheSection = getObjFileLowering().getTLSBSSSection(); + OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); + } else if (GVKind.isThreadData()) { + OutStreamer->SwitchSection(TheSection); + + EmitAlignment(AlignLog, GV); + OutStreamer->EmitLabel(MangSym); + + EmitGlobalConstant(GV->getParent()->getDataLayout(), + GV->getInitializer()); + } + + OutStreamer->AddBlankLine(); + + // Emit the variable struct for the runtime. + MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); + + OutStreamer->SwitchSection(TLVSect); + // Emit the linkage here. + EmitLinkage(GV, GVSym); + OutStreamer->EmitLabel(GVSym); + + // Three pointers in size: + // - __tlv_bootstrap - used to make sure support exists + // - spare pointer, used when mapped by the runtime + // - pointer to mangled symbol above with initializer + unsigned PtrSize = DL.getPointerTypeSize(GV->getType()); + OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), + PtrSize); + OutStreamer->EmitIntValue(0, PtrSize); + OutStreamer->EmitSymbolValue(MangSym, PtrSize); + + OutStreamer->AddBlankLine(); + return; + } + + OutStreamer->SwitchSection(TheSection); + + // emutls_t.* symbols are only used in the current compilation unit. + if (!IsEmuTLSVar) + EmitLinkage(GV, EmittedInitSym); + EmitAlignment(AlignLog, GV); + + OutStreamer->EmitLabel(EmittedInitSym); + + EmitGlobalConstant(GV->getParent()->getDataLayout(), GV->getInitializer()); + + if (MAI->hasDotTypeDotSizeDirective()) + // .size foo, 42 + OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym), + MCConstantExpr::create(Size, OutContext)); + + OutStreamer->AddBlankLine(); +} + +/// EmitFunctionHeader - This method emits the header for the current +/// function. +void AsmPrinter::EmitFunctionHeader() { + // Print out constants referenced by the function + EmitConstantPool(); + + // Print the 'header' of function. + const Function *F = MF->getFunction(); + + OutStreamer->SwitchSection( + getObjFileLowering().SectionForGlobal(F, *Mang, TM)); + EmitVisibility(CurrentFnSym, F->getVisibility()); + + EmitLinkage(F, CurrentFnSym); + if (MAI->hasFunctionAlignment()) + EmitAlignment(MF->getAlignment(), F); + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction); + + if (isVerbose()) { + F->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, F->getParent()); + OutStreamer->GetCommentOS() << '\n'; + } + + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrefixData()); + + // Emit the CurrentFnSym. This is a virtual function to allow targets to + // do their wild and crazy things as required. + EmitFunctionEntryLabel(); + + // If the function had address-taken blocks that got deleted, then we have + // references to the dangling symbols. Emit them at the start of the function + // so that we don't get references to undefined symbols. + std::vector<MCSymbol*> DeadBlockSyms; + MMI->takeDeletedSymbolsForFunction(F, DeadBlockSyms); + for (unsigned i = 0, e = DeadBlockSyms.size(); i != e; ++i) { + OutStreamer->AddComment("Address taken block that was later removed"); + OutStreamer->EmitLabel(DeadBlockSyms[i]); + } + + if (CurrentFnBegin) { + if (MAI->useAssignmentForEHBegin()) { + MCSymbol *CurPos = OutContext.createTempSymbol(); + OutStreamer->EmitLabel(CurPos); + OutStreamer->EmitAssignment(CurrentFnBegin, + MCSymbolRefExpr::create(CurPos, OutContext)); + } else { + OutStreamer->EmitLabel(CurrentFnBegin); + } + } + + // Emit pre-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->beginFunction(MF); + } + + // Emit the prologue data. + if (F->hasPrologueData()) + EmitGlobalConstant(F->getParent()->getDataLayout(), F->getPrologueData()); +} + +/// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the +/// function. This can be overridden by targets as required to do custom stuff. +void AsmPrinter::EmitFunctionEntryLabel() { + CurrentFnSym->redefineIfPossible(); + + // The function label could have already been emitted if two symbols end up + // conflicting due to asm renaming. Detect this and emit an error. + if (CurrentFnSym->isVariable()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' is a protected alias"); + if (CurrentFnSym->isDefined()) + report_fatal_error("'" + Twine(CurrentFnSym->getName()) + + "' label emitted multiple times to assembly file"); + + return OutStreamer->EmitLabel(CurrentFnSym); +} + +/// emitComments - Pretty-print comments for instructions. +static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // Check for spills and reloads + int FI; + + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); + + // We assume a single instruction only has a spill or reload, not + // both. + const MachineMemOperand *MMO; + if (TII->isLoadFromStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Reload\n"; + } + } else if (TII->hasLoadFromStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Reload\n"; + } else if (TII->isStoreToStackSlotPostFE(&MI, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) { + MMO = *MI.memoperands_begin(); + CommentOS << MMO->getSize() << "-byte Spill\n"; + } + } else if (TII->hasStoreToStackSlot(&MI, MMO, FI)) { + if (FrameInfo->isSpillSlotObjectIndex(FI)) + CommentOS << MMO->getSize() << "-byte Folded Spill\n"; + } + + // Check for spill-induced copies + if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) + CommentOS << " Reload Reuse\n"; +} + +/// emitImplicitDef - This method emits the specified machine instruction +/// that is an implicit def. +void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { + unsigned RegNo = MI->getOperand(0).getReg(); + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "implicit-def: " + << PrintReg(RegNo, MF->getSubtarget().getRegisterInfo()); + + OutStreamer->AddComment(OS.str()); + OutStreamer->AddBlankLine(); +} + +static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { + std::string Str; + raw_string_ostream OS(Str); + OS << "kill:"; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + assert(Op.isReg() && "KILL instruction must have only register operands"); + OS << ' ' + << PrintReg(Op.getReg(), + AP.MF->getSubtarget().getRegisterInfo()) + << (Op.isDef() ? "<def>" : "<kill>"); + } + AP.OutStreamer->AddComment(Str); + AP.OutStreamer->AddBlankLine(); +} + +/// emitDebugValueComment - This method handles the target-independent form +/// of DBG_VALUE, returning true if it was able to do so. A false return +/// means the target will need to handle MI in EmitInstruction. +static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { + // This code handles only the 4-operand target-independent form. + if (MI->getNumOperands() != 4) + return false; + + SmallString<128> Str; + raw_svector_ostream OS(Str); + OS << "DEBUG_VALUE: "; + + const DILocalVariable *V = MI->getDebugVariable(); + if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) { + StringRef Name = SP->getDisplayName(); + if (!Name.empty()) + OS << Name << ":"; + } + OS << V->getName(); + + const DIExpression *Expr = MI->getDebugExpression(); + if (Expr->isBitPiece()) + OS << " [bit_piece offset=" << Expr->getBitPieceOffset() + << " size=" << Expr->getBitPieceSize() << "]"; + OS << " <- "; + + // The second operand is only an offset if it's an immediate. + bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + + for (unsigned i = 0; i < Expr->getNumElements(); ++i) { + if (Deref) { + // We currently don't support extra Offsets or derefs after the first + // one. Bail out early instead of emitting an incorrect comment + OS << " [complex expression]"; + AP.OutStreamer->emitRawComment(OS.str()); + return true; + } + uint64_t Op = Expr->getElement(i); + if (Op == dwarf::DW_OP_deref) { + Deref = true; + continue; + } else if (Op == dwarf::DW_OP_bit_piece) { + // There can't be any operands after this in a valid expression + break; + } + uint64_t ExtraOffset = Expr->getElement(i++); + if (Op == dwarf::DW_OP_plus) + Offset += ExtraOffset; + else { + assert(Op == dwarf::DW_OP_minus); + Offset -= ExtraOffset; + } + } + + // Register or immediate value. Register 0 means undef. + if (MI->getOperand(0).isFPImm()) { + APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); + if (MI->getOperand(0).getFPImm()->getType()->isFloatTy()) { + OS << (double)APF.convertToFloat(); + } else if (MI->getOperand(0).getFPImm()->getType()->isDoubleTy()) { + OS << APF.convertToDouble(); + } else { + // There is no good way to print long double. Convert a copy to + // double. Ah well, it's only a comment. + bool ignored; + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + OS << "(long double) " << APF.convertToDouble(); + } + } else if (MI->getOperand(0).isImm()) { + OS << MI->getOperand(0).getImm(); + } else if (MI->getOperand(0).isCImm()) { + MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); + } else { + unsigned Reg; + if (MI->getOperand(0).isReg()) { + Reg = MI->getOperand(0).getReg(); + } else { + assert(MI->getOperand(0).isFI() && "Unknown operand type"); + const TargetFrameLowering *TFI = AP.MF->getSubtarget().getFrameLowering(); + Offset += TFI->getFrameIndexReference(*AP.MF, + MI->getOperand(0).getIndex(), Reg); + Deref = true; + } + if (Reg == 0) { + // Suppress offset, it is not meaningful here. + OS << "undef"; + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); + return true; + } + if (Deref) + OS << '['; + OS << PrintReg(Reg, AP.MF->getSubtarget().getRegisterInfo()); + } + + if (Deref) + OS << '+' << Offset << ']'; + + // NOTE: Want this comment at start of line, don't emit with AddComment. + AP.OutStreamer->emitRawComment(OS.str()); + return true; +} + +AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { + if (MAI->getExceptionHandlingType() == ExceptionHandling::DwarfCFI && + MF->getFunction()->needsUnwindTableEntry()) + return CFI_M_EH; + + if (MMI->hasDebugInfo()) + return CFI_M_Debug; + + return CFI_M_None; +} + +bool AsmPrinter::needsSEHMoves() { + return MAI->usesWindowsCFI() && MF->getFunction()->needsUnwindTableEntry(); +} + +void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { + ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); + if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && + ExceptionHandlingType != ExceptionHandling::ARM) + return; + + if (needsCFIMoves() == CFI_M_None) + return; + + const MachineModuleInfo &MMI = MF->getMMI(); + const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions(); + unsigned CFIIndex = MI.getOperand(0).getCFIIndex(); + const MCCFIInstruction &CFI = Instrs[CFIIndex]; + emitCFIInstruction(CFI); +} + +void AsmPrinter::emitFrameAlloc(const MachineInstr &MI) { + // The operands are the MCSymbol and the frame offset of the allocation. + MCSymbol *FrameAllocSym = MI.getOperand(0).getMCSymbol(); + int FrameOffset = MI.getOperand(1).getImm(); + + // Emit a symbol assignment. + OutStreamer->EmitAssignment(FrameAllocSym, + MCConstantExpr::create(FrameOffset, OutContext)); +} + +/// EmitFunctionBody - This method emits the body and trailer for a +/// function. +void AsmPrinter::EmitFunctionBody() { + EmitFunctionHeader(); + + // Emit target-specific gunk before the function body. + EmitFunctionBodyStart(); + + bool ShouldPrintDebugScopes = MMI->hasDebugInfo(); + + // Print out code for the function. + bool HasAnyRealCode = false; + for (auto &MBB : *MF) { + // Print a label for the basic block. + EmitBasicBlockStart(MBB); + for (auto &MI : MBB) { + + // Print the assembly for the instruction. + if (!MI.isPosition() && !MI.isImplicitDef() && !MI.isKill() && + !MI.isDebugValue()) { + HasAnyRealCode = true; + ++EmittedInsts; + } + + if (ShouldPrintDebugScopes) { + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + TimePassesIsEnabled); + HI.Handler->beginInstruction(&MI); + } + } + + if (isVerbose()) + emitComments(MI, OutStreamer->GetCommentOS()); + + switch (MI.getOpcode()) { + case TargetOpcode::CFI_INSTRUCTION: + emitCFIInstruction(MI); + break; + + case TargetOpcode::LOCAL_ESCAPE: + emitFrameAlloc(MI); + break; + + case TargetOpcode::EH_LABEL: + case TargetOpcode::GC_LABEL: + OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol()); + break; + case TargetOpcode::INLINEASM: + EmitInlineAsm(&MI); + break; + case TargetOpcode::DBG_VALUE: + if (isVerbose()) { + if (!emitDebugValueComment(&MI, *this)) + EmitInstruction(&MI); + } + break; + case TargetOpcode::IMPLICIT_DEF: + if (isVerbose()) emitImplicitDef(&MI); + break; + case TargetOpcode::KILL: + if (isVerbose()) emitKill(&MI, *this); + break; + default: + EmitInstruction(&MI); + break; + } + + if (ShouldPrintDebugScopes) { + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + TimePassesIsEnabled); + HI.Handler->endInstruction(); + } + } + } + + EmitBasicBlockEnd(MBB); + } + + // If the function is empty and the object file uses .subsections_via_symbols, + // then we need to emit *something* to the function body to prevent the + // labels from collapsing together. Just emit a noop. + if ((MAI->hasSubsectionsViaSymbols() && !HasAnyRealCode)) { + MCInst Noop; + MF->getSubtarget().getInstrInfo()->getNoopForMachoTarget(Noop); + OutStreamer->AddComment("avoids zero-length function"); + + // Targets can opt-out of emitting the noop here by leaving the opcode + // unspecified. + if (Noop.getOpcode()) + OutStreamer->EmitInstruction(Noop, getSubtargetInfo()); + } + + const Function *F = MF->getFunction(); + for (const auto &BB : *F) { + if (!BB.hasAddressTaken()) + continue; + MCSymbol *Sym = GetBlockAddressSymbol(&BB); + if (Sym->isDefined()) + continue; + OutStreamer->AddComment("Address of block that was removed by CodeGen"); + OutStreamer->EmitLabel(Sym); + } + + // Emit target-specific gunk after the function body. + EmitFunctionBodyEnd(); + + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) { + // Create a symbol for the end of function. + CurrentFnEnd = createTempSymbol("func_end"); + OutStreamer->EmitLabel(CurrentFnEnd); + } + + // If the target wants a .size directive for the size of the function, emit + // it. + if (MAI->hasDotTypeDotSizeDirective()) { + // We can get the size as difference between the function label and the + // temp label. + const MCExpr *SizeExp = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(CurrentFnEnd, OutContext), + MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext); + if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym)) + OutStreamer->emitELFSize(Sym, SizeExp); + } + + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->markFunctionEnd(); + } + + // Print out jump tables referenced by the function. + EmitJumpTableInfo(); + + // Emit post-function debug and/or EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled); + HI.Handler->endFunction(MF); + } + MMI->EndFunction(); + + OutStreamer->AddBlankLine(); +} + +/// \brief Compute the number of Global Variables that uses a Constant. +static unsigned getNumGlobalVariableUses(const Constant *C) { + if (!C) + return 0; + + if (isa<GlobalVariable>(C)) + return 1; + + unsigned NumUses = 0; + for (auto *CU : C->users()) + NumUses += getNumGlobalVariableUses(dyn_cast<Constant>(CU)); + + return NumUses; +} + +/// \brief Only consider global GOT equivalents if at least one user is a +/// cstexpr inside an initializer of another global variables. Also, don't +/// handle cstexpr inside instructions. During global variable emission, +/// candidates are skipped and are emitted later in case at least one cstexpr +/// isn't replaced by a PC relative GOT entry access. +static bool isGOTEquivalentCandidate(const GlobalVariable *GV, + unsigned &NumGOTEquivUsers) { + // Global GOT equivalents are unnamed private globals with a constant + // pointer initializer to another global symbol. They must point to a + // GlobalVariable or Function, i.e., as GlobalValue. + if (!GV->hasUnnamedAddr() || !GV->hasInitializer() || !GV->isConstant() || + !GV->isDiscardableIfUnused() || !dyn_cast<GlobalValue>(GV->getOperand(0))) + return false; + + // To be a got equivalent, at least one of its users need to be a constant + // expression used by another global variable. + for (auto *U : GV->users()) + NumGOTEquivUsers += getNumGlobalVariableUses(dyn_cast<Constant>(U)); + + return NumGOTEquivUsers > 0; +} + +/// \brief Unnamed constant global variables solely contaning a pointer to +/// another globals variable is equivalent to a GOT table entry; it contains the +/// the address of another symbol. Optimize it and replace accesses to these +/// "GOT equivalents" by using the GOT entry for the final global instead. +/// Compute GOT equivalent candidates among all global variables to avoid +/// emitting them if possible later on, after it use is replaced by a GOT entry +/// access. +void AsmPrinter::computeGlobalGOTEquivs(Module &M) { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + for (const auto &G : M.globals()) { + unsigned NumGOTEquivUsers = 0; + if (!isGOTEquivalentCandidate(&G, NumGOTEquivUsers)) + continue; + + const MCSymbol *GOTEquivSym = getSymbol(&G); + GlobalGOTEquivs[GOTEquivSym] = std::make_pair(&G, NumGOTEquivUsers); + } +} + +/// \brief Constant expressions using GOT equivalent globals may not be eligible +/// for PC relative GOT entry conversion, in such cases we need to emit such +/// globals we previously omitted in EmitGlobalVariable. +void AsmPrinter::emitGlobalGOTEquivs() { + if (!getObjFileLowering().supportIndirectSymViaGOTPCRel()) + return; + + SmallVector<const GlobalVariable *, 8> FailedCandidates; + for (auto &I : GlobalGOTEquivs) { + const GlobalVariable *GV = I.second.first; + unsigned Cnt = I.second.second; + if (Cnt) + FailedCandidates.push_back(GV); + } + GlobalGOTEquivs.clear(); + + for (auto *GV : FailedCandidates) + EmitGlobalVariable(GV); +} + +bool AsmPrinter::doFinalization(Module &M) { + // Set the MachineFunction to nullptr so that we can catch attempted + // accesses to MF specific features at the module level and so that + // we can conditionalize accesses based on whether or not it is nullptr. + MF = nullptr; + + // Gather all GOT equivalent globals in the module. We really need two + // passes over the globals: one to compute and another to avoid its emission + // in EmitGlobalVariable, otherwise we would not be able to handle cases + // where the got equivalent shows up before its use. + computeGlobalGOTEquivs(M); + + // Emit global variables. + for (const auto &G : M.globals()) + EmitGlobalVariable(&G); + + // Emit remaining GOT equivalent globals. + emitGlobalGOTEquivs(); + + // Emit visibility info for declarations + for (const Function &F : M) { + if (!F.isDeclarationForLinker()) + continue; + GlobalValue::VisibilityTypes V = F.getVisibility(); + if (V == GlobalValue::DefaultVisibility) + continue; + + MCSymbol *Name = getSymbol(&F); + EmitVisibility(Name, V, false); + } + + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + // Emit module flags. + SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; + M.getModuleFlagsMetadata(ModuleFlags); + if (!ModuleFlags.empty()) + TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM); + + if (TM.getTargetTriple().isOSBinFormatELF()) { + MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + // Output stubs for external and common global variables. + MachineModuleInfoELF::SymbolListTy Stubs = MMIELF.GetGVStubList(); + if (!Stubs.empty()) { + OutStreamer->SwitchSection(TLOF.getDataSection()); + const DataLayout &DL = M.getDataLayout(); + + for (const auto &Stub : Stubs) { + OutStreamer->EmitLabel(Stub.first); + OutStreamer->EmitSymbolValue(Stub.second.getPointer(), + DL.getPointerSize()); + } + } + } + + // Finalize debug and EH information. + for (const HandlerInfo &HI : Handlers) { + NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, + TimePassesIsEnabled); + HI.Handler->endModule(); + delete HI.Handler; + } + Handlers.clear(); + DD = nullptr; + + // If the target wants to know about weak references, print them all. + if (MAI->getWeakRefDirective()) { + // FIXME: This is not lazy, it would be nice to only print weak references + // to stuff that is actually used. Note that doing so would require targets + // to notice uses in operands (due to constant exprs etc). This should + // happen with the MC stuff eventually. + + // Print out module-level global variables here. + for (const auto &G : M.globals()) { + if (!G.hasExternalWeakLinkage()) + continue; + OutStreamer->EmitSymbolAttribute(getSymbol(&G), MCSA_WeakReference); + } + + for (const auto &F : M) { + if (!F.hasExternalWeakLinkage()) + continue; + OutStreamer->EmitSymbolAttribute(getSymbol(&F), MCSA_WeakReference); + } + } + + OutStreamer->AddBlankLine(); + for (const auto &Alias : M.aliases()) { + MCSymbol *Name = getSymbol(&Alias); + + if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_Global); + else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_WeakReference); + else + assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (Alias.getType()->getPointerElementType()->isFunctionTy()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + + EmitVisibility(Name, Alias.getVisibility()); + + // Emit the directives as assignments aka .set: + OutStreamer->EmitAssignment(Name, lowerConstant(Alias.getAliasee())); + + // If the aliasee does not correspond to a symbol in the output, i.e. the + // alias is not of an object or the aliased object is private, then set the + // size of the alias symbol from the type of the alias. We don't do this in + // other situations as the alias and aliasee having differing types but same + // size may be intentional. + const GlobalObject *BaseObject = Alias.getBaseObject(); + if (MAI->hasDotTypeDotSizeDirective() && Alias.getValueType()->isSized() && + (!BaseObject || BaseObject->hasPrivateLinkage())) { + const DataLayout &DL = M.getDataLayout(); + uint64_t Size = DL.getTypeAllocSize(Alias.getValueType()); + OutStreamer->emitELFSize(cast<MCSymbolELF>(Name), + MCConstantExpr::create(Size, OutContext)); + } + } + + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "AsmPrinter didn't require GCModuleInfo?"); + for (GCModuleInfo::iterator I = MI->end(), E = MI->begin(); I != E; ) + if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(**--I)) + MP->finishAssembly(M, *MI, *this); + + // Emit llvm.ident metadata in an '.ident' directive. + EmitModuleIdents(M); + + // Emit __morestack address if needed for indirect calls. + if (MMI->usesMorestackAddr()) { + MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant( + getDataLayout(), SectionKind::getReadOnly(), + /*C=*/nullptr); + OutStreamer->SwitchSection(ReadOnlySection); + + MCSymbol *AddrSymbol = + OutContext.getOrCreateSymbol(StringRef("__morestack_addr")); + OutStreamer->EmitLabel(AddrSymbol); + + unsigned PtrSize = M.getDataLayout().getPointerSize(0); + OutStreamer->EmitSymbolValue(GetExternalSymbolSymbol("__morestack"), + PtrSize); + } + + // If we don't have any trampolines, then we don't require stack memory + // to be executable. Some targets have a directive to declare this. + Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); + if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) + if (MCSection *S = MAI->getNonexecutableStackSection(OutContext)) + OutStreamer->SwitchSection(S); + + // Allow the target to emit any magic that it wants at the end of the file, + // after everything else has gone out. + EmitEndOfAsmFile(M); + + delete Mang; Mang = nullptr; + MMI = nullptr; + + OutStreamer->Finish(); + OutStreamer->reset(); + + return false; +} + +MCSymbol *AsmPrinter::getCurExceptionSym() { + if (!CurExceptionSym) + CurExceptionSym = createTempSymbol("exception"); + return CurExceptionSym; +} + +void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { + this->MF = &MF; + // Get the function symbol. + CurrentFnSym = getSymbol(MF.getFunction()); + CurrentFnSymForSize = CurrentFnSym; + CurrentFnBegin = nullptr; + CurExceptionSym = nullptr; + bool NeedsLocalForSize = MAI->needsLocalForSize(); + if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() || + MMI->hasEHFunclets() || NeedsLocalForSize) { + CurrentFnBegin = createTempSymbol("func_begin"); + if (NeedsLocalForSize) + CurrentFnSymForSize = CurrentFnBegin; + } + + if (isVerbose()) + LI = &getAnalysis<MachineLoopInfo>(); +} + +namespace { +// Keep track the alignment, constpool entries per Section. + struct SectionCPs { + MCSection *S; + unsigned Alignment; + SmallVector<unsigned, 4> CPEs; + SectionCPs(MCSection *s, unsigned a) : S(s), Alignment(a) {} + }; +} + +/// EmitConstantPool - Print to the current output stream assembly +/// representations of the constants in the constant pool MCP. This is +/// used to print out constants which have been "spilled to memory" by +/// the code generator. +/// +void AsmPrinter::EmitConstantPool() { + const MachineConstantPool *MCP = MF->getConstantPool(); + const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants(); + if (CP.empty()) return; + + // Calculate sections for constant pool entries. We collect entries to go into + // the same section together to reduce amount of section switch statements. + SmallVector<SectionCPs, 4> CPSections; + for (unsigned i = 0, e = CP.size(); i != e; ++i) { + const MachineConstantPoolEntry &CPE = CP[i]; + unsigned Align = CPE.getAlignment(); + + SectionKind Kind = CPE.getSectionKind(&getDataLayout()); + + const Constant *C = nullptr; + if (!CPE.isMachineConstantPoolEntry()) + C = CPE.Val.ConstVal; + + MCSection *S = + getObjFileLowering().getSectionForConstant(getDataLayout(), Kind, C); + + // The number of sections are small, just do a linear search from the + // last section to the first. + bool Found = false; + unsigned SecIdx = CPSections.size(); + while (SecIdx != 0) { + if (CPSections[--SecIdx].S == S) { + Found = true; + break; + } + } + if (!Found) { + SecIdx = CPSections.size(); + CPSections.push_back(SectionCPs(S, Align)); + } + + if (Align > CPSections[SecIdx].Alignment) + CPSections[SecIdx].Alignment = Align; + CPSections[SecIdx].CPEs.push_back(i); + } + + // Now print stuff into the calculated sections. + const MCSection *CurSection = nullptr; + unsigned Offset = 0; + for (unsigned i = 0, e = CPSections.size(); i != e; ++i) { + for (unsigned j = 0, ee = CPSections[i].CPEs.size(); j != ee; ++j) { + unsigned CPI = CPSections[i].CPEs[j]; + MCSymbol *Sym = GetCPISymbol(CPI); + if (!Sym->isUndefined()) + continue; + + if (CurSection != CPSections[i].S) { + OutStreamer->SwitchSection(CPSections[i].S); + EmitAlignment(Log2_32(CPSections[i].Alignment)); + CurSection = CPSections[i].S; + Offset = 0; + } + + MachineConstantPoolEntry CPE = CP[CPI]; + + // Emit inter-object padding for alignment. + unsigned AlignMask = CPE.getAlignment() - 1; + unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; + OutStreamer->EmitZeros(NewOffset - Offset); + + Type *Ty = CPE.getType(); + Offset = NewOffset + getDataLayout().getTypeAllocSize(Ty); + + OutStreamer->EmitLabel(Sym); + if (CPE.isMachineConstantPoolEntry()) + EmitMachineConstantPoolValue(CPE.Val.MachineCPVal); + else + EmitGlobalConstant(getDataLayout(), CPE.Val.ConstVal); + } + } +} + +/// EmitJumpTableInfo - Print assembly representations of the jump tables used +/// by the current function to the current output stream. +/// +void AsmPrinter::EmitJumpTableInfo() { + const DataLayout &DL = MF->getDataLayout(); + const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); + if (!MJTI) return; + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_Inline) return; + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + if (JT.empty()) return; + + // Pick the directive to use to print the jump table entries, and switch to + // the appropriate section. + const Function *F = MF->getFunction(); + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + bool JTInDiffSection = !TLOF.shouldPutJumpTableInFunctionSection( + MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32, + *F); + if (JTInDiffSection) { + // Drop it in the readonly section. + MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM); + OutStreamer->SwitchSection(ReadOnlySection); + } + + EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL))); + + // Jump tables in code sections are marked with a data_region directive + // where that's supported. + if (!JTInDiffSection) + OutStreamer->EmitDataRegion(MCDR_DataRegionJT32); + + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + + // If this jump table was deleted, ignore it. + if (JTBBs.empty()) continue; + + // For the EK_LabelDifference32 entry, if using .set avoids a relocation, + /// emit a .set directive for each unique entry. + if (MJTI->getEntryKind() == MachineJumpTableInfo::EK_LabelDifference32 && + MAI->doesSetDirectiveSuppressesReloc()) { + SmallPtrSet<const MachineBasicBlock*, 16> EmittedSets; + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF,JTI,OutContext); + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { + const MachineBasicBlock *MBB = JTBBs[ii]; + if (!EmittedSets.insert(MBB).second) + continue; + + // .set LJTSet, LBB32-base + const MCExpr *LHS = + MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + OutStreamer->EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), + MCBinaryExpr::createSub(LHS, Base, + OutContext)); + } + } + + // On some targets (e.g. Darwin) we want to emit two consecutive labels + // before each jump table. The first label is never referenced, but tells + // the assembler and linker the extents of the jump table object. The + // second label is actually referenced by the code. + if (JTInDiffSection && DL.hasLinkerPrivateGlobalPrefix()) + // FIXME: This doesn't have to have any specific name, just any randomly + // named and numbered 'l' label would work. Simplify GetJTISymbol. + OutStreamer->EmitLabel(GetJTISymbol(JTI, true)); + + OutStreamer->EmitLabel(GetJTISymbol(JTI)); + + for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) + EmitJumpTableEntry(MJTI, JTBBs[ii], JTI); + } + if (!JTInDiffSection) + OutStreamer->EmitDataRegion(MCDR_DataRegionEnd); +} + +/// EmitJumpTableEntry - Emit a jump table entry for the specified MBB to the +/// current stream. +void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, + const MachineBasicBlock *MBB, + unsigned UID) const { + assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block"); + const MCExpr *Value = nullptr; + switch (MJTI->getEntryKind()) { + case MachineJumpTableInfo::EK_Inline: + llvm_unreachable("Cannot emit EK_Inline jump table entry"); + case MachineJumpTableInfo::EK_Custom32: + Value = MF->getSubtarget().getTargetLowering()->LowerCustomJumpTableEntry( + MJTI, MBB, UID, OutContext); + break; + case MachineJumpTableInfo::EK_BlockAddress: + // EK_BlockAddress - Each entry is a plain address of block, e.g.: + // .word LBB123 + Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + break; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: { + // EK_GPRel32BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gprel32 LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer->EmitGPRel32Value(MCSymbolRefExpr::create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_GPRel64BlockAddress: { + // EK_GPRel64BlockAddress - Each entry is an address of block, encoded + // with a relocation as gp-relative, e.g.: + // .gpdword LBB123 + MCSymbol *MBBSym = MBB->getSymbol(); + OutStreamer->EmitGPRel64Value(MCSymbolRefExpr::create(MBBSym, OutContext)); + return; + } + + case MachineJumpTableInfo::EK_LabelDifference32: { + // Each entry is the address of the block minus the address of the jump + // table. This is used for PIC jump tables where gprel32 is not supported. + // e.g.: + // .word LBB123 - LJTI1_2 + // If the .set directive avoids relocations, this is emitted as: + // .set L4_5_set_123, LBB123 - LJTI1_2 + // .word L4_5_set_123 + if (MAI->doesSetDirectiveSuppressesReloc()) { + Value = MCSymbolRefExpr::create(GetJTSetSymbol(UID, MBB->getNumber()), + OutContext); + break; + } + Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext); + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, UID, OutContext); + Value = MCBinaryExpr::createSub(Value, Base, OutContext); + break; + } + } + + assert(Value && "Unknown entry kind!"); + + unsigned EntrySize = MJTI->getEntrySize(getDataLayout()); + OutStreamer->EmitValue(Value, EntrySize); +} + + +/// EmitSpecialLLVMGlobal - Check to see if the specified global is a +/// special global used by LLVM. If so, emit it and return true, otherwise +/// do nothing and return false. +bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { + if (GV->getName() == "llvm.used") { + if (MAI->hasNoDeadStrip()) // No need to emit this at all. + EmitLLVMUsedList(cast<ConstantArray>(GV->getInitializer())); + return true; + } + + // Ignore debug and non-emitted data. This handles llvm.compiler.used. + if (StringRef(GV->getSection()) == "llvm.metadata" || + GV->hasAvailableExternallyLinkage()) + return true; + + if (!GV->hasAppendingLinkage()) return false; + + assert(GV->hasInitializer() && "Not a special LLVM global!"); + + if (GV->getName() == "llvm.global_ctors") { + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ true); + + if (TM.getRelocationModel() == Reloc::Static && + MAI->hasStaticCtorDtorReferenceInStaticMode()) { + StringRef Sym(".constructors_used"); + OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym), + MCSA_Reference); + } + return true; + } + + if (GV->getName() == "llvm.global_dtors") { + EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(), + /* isCtor */ false); + + if (TM.getRelocationModel() == Reloc::Static && + MAI->hasStaticCtorDtorReferenceInStaticMode()) { + StringRef Sym(".destructors_used"); + OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym), + MCSA_Reference); + } + return true; + } + + return false; +} + +/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each +/// global in the specified llvm.used list for which emitUsedDirectiveFor +/// is true, as being used with this directive. +void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { + // Should be an array of 'i8*'. + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + const GlobalValue *GV = + dyn_cast<GlobalValue>(InitList->getOperand(i)->stripPointerCasts()); + if (GV) + OutStreamer->EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); + } +} + +namespace { +struct Structor { + Structor() : Priority(0), Func(nullptr), ComdatKey(nullptr) {} + int Priority; + llvm::Constant *Func; + llvm::GlobalValue *ComdatKey; +}; +} // end namespace + +/// EmitXXStructorList - Emit the ctor or dtor list taking into account the init +/// priority. +void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List, + bool isCtor) { + // Should be an array of '{ int, void ()* }' structs. The first value is the + // init priority. + if (!isa<ConstantArray>(List)) return; + + // Sanity check the structors list. + const ConstantArray *InitList = dyn_cast<ConstantArray>(List); + if (!InitList) return; // Not an array! + StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType()); + // FIXME: Only allow the 3-field form in LLVM 4.0. + if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3) + return; // Not an array of two or three elements! + if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) || + !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). + if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U))) + return; // Not (int, ptr, ptr). + + // Gather the structors in a form that's convenient for sorting by priority. + SmallVector<Structor, 8> Structors; + for (Value *O : InitList->operands()) { + ConstantStruct *CS = dyn_cast<ConstantStruct>(O); + if (!CS) continue; // Malformed. + if (CS->getOperand(1)->isNullValue()) + break; // Found a null terminator, skip the rest. + ConstantInt *Priority = dyn_cast<ConstantInt>(CS->getOperand(0)); + if (!Priority) continue; // Malformed. + Structors.push_back(Structor()); + Structor &S = Structors.back(); + S.Priority = Priority->getLimitedValue(65535); + S.Func = CS->getOperand(1); + if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue()) + S.ComdatKey = dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts()); + } + + // Emit the function pointers in the target-specific order + unsigned Align = Log2_32(DL.getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), + [](const Structor &L, + const Structor &R) { return L.Priority < R.Priority; }); + for (Structor &S : Structors) { + const TargetLoweringObjectFile &Obj = getObjFileLowering(); + const MCSymbol *KeySym = nullptr; + if (GlobalValue *GV = S.ComdatKey) { + if (GV->hasAvailableExternallyLinkage()) + // If the associated variable is available_externally, some other TU + // will provide its dynamic initializer. + continue; + + KeySym = getSymbol(GV); + } + MCSection *OutputSection = + (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) + : Obj.getStaticDtorSection(S.Priority, KeySym)); + OutStreamer->SwitchSection(OutputSection); + if (OutStreamer->getCurrentSection() != OutStreamer->getPreviousSection()) + EmitAlignment(Align); + EmitXXStructor(DL, S.Func); + } +} + +void AsmPrinter::EmitModuleIdents(Module &M) { + if (!MAI->hasIdentDirective()) + return; + + if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.ident metadata entry can have only one operand"); + const MDString *S = cast<MDString>(N->getOperand(0)); + OutStreamer->EmitIdent(S->getString()); + } + } +} + +//===--------------------------------------------------------------------===// +// Emission and print routines +// + +/// EmitInt8 - Emit a byte directive and value. +/// +void AsmPrinter::EmitInt8(int Value) const { + OutStreamer->EmitIntValue(Value, 1); +} + +/// EmitInt16 - Emit a short directive and value. +/// +void AsmPrinter::EmitInt16(int Value) const { + OutStreamer->EmitIntValue(Value, 2); +} + +/// EmitInt32 - Emit a long directive and value. +/// +void AsmPrinter::EmitInt32(int Value) const { + OutStreamer->EmitIntValue(Value, 4); +} + +/// Emit something like ".long Hi-Lo" where the size in bytes of the directive +/// is specified by Size and Hi/Lo specify the labels. This implicitly uses +/// .set if it avoids relocations. +void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, + unsigned Size) const { + OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size); +} + +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// where the size in bytes of the directive is specified by Size and Label +/// specifies the label. This implicitly uses .set if it is available. +void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, + unsigned Size, + bool IsSectionRelative) const { + if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { + OutStreamer->EmitCOFFSecRel32(Label); + return; + } + + // Emit Label+Offset (or just Label if Offset is zero) + const MCExpr *Expr = MCSymbolRefExpr::create(Label, OutContext); + if (Offset) + Expr = MCBinaryExpr::createAdd( + Expr, MCConstantExpr::create(Offset, OutContext), OutContext); + + OutStreamer->EmitValue(Expr, Size); +} + +//===----------------------------------------------------------------------===// + +// EmitAlignment - Emit an alignment directive to the specified power of +// two boundary. For example, if you pass in 3 here, you will get an 8 +// byte alignment. If a global value is specified, and if that global has +// an explicit alignment requested, it will override the alignment request +// if required for correctness. +// +void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const { + if (GV) + NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits); + + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. + + assert(NumBits < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); + if (getCurrentSection()->getKind().isText()) + OutStreamer->EmitCodeAlignment(1u << NumBits); + else + OutStreamer->EmitValueToAlignment(1u << NumBits); +} + +//===----------------------------------------------------------------------===// +// Constant emission. +//===----------------------------------------------------------------------===// + +const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) { + MCContext &Ctx = OutContext; + + if (CV->isNullValue() || isa<UndefValue>(CV)) + return MCConstantExpr::create(0, Ctx); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) + return MCConstantExpr::create(CI->getZExtValue(), Ctx); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) + return MCSymbolRefExpr::create(getSymbol(GV), Ctx); + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(CV)) + return MCSymbolRefExpr::create(GetBlockAddressSymbol(BA), Ctx); + + const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); + if (!CE) { + llvm_unreachable("Unknown constant value to lower!"); + } + + if (const MCExpr *RelocExpr + = getObjFileLowering().getExecutableRelativeSymbol(CE, *Mang, TM)) + return RelocExpr; + + switch (CE->getOpcode()) { + default: + // If the code isn't optimized, there may be outstanding folding + // opportunities. Attempt to fold the expression using DataLayout as a + // last resort before giving up. + if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout())) + if (C != CE) + return lowerConstant(C); + + // Otherwise report the problem to the user. + { + std::string S; + raw_string_ostream OS(S); + OS << "Unsupported expression in static initializer: "; + CE->printAsOperand(OS, /*PrintType=*/false, + !MF ? nullptr : MF->getFunction()->getParent()); + report_fatal_error(OS.str()); + } + case Instruction::GetElementPtr: { + // Generate a symbolic expression for the byte address + APInt OffsetAI(getDataLayout().getPointerTypeSizeInBits(CE->getType()), 0); + cast<GEPOperator>(CE)->accumulateConstantOffset(getDataLayout(), OffsetAI); + + const MCExpr *Base = lowerConstant(CE->getOperand(0)); + if (!OffsetAI) + return Base; + + int64_t Offset = OffsetAI.getSExtValue(); + return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), + Ctx); + } + + case Instruction::Trunc: + // We emit the value and depend on the assembler to truncate the generated + // expression properly. This is important for differences between + // blockaddress labels. Since the two labels are in the same function, it + // is reasonable to treat their delta as a 32-bit value. + // FALL THROUGH. + case Instruction::BitCast: + return lowerConstant(CE->getOperand(0)); + + case Instruction::IntToPtr: { + const DataLayout &DL = getDataLayout(); + + // Handle casts to pointers by changing them into casts to the appropriate + // integer type. This promotes constant folding and simplifies this code. + Constant *Op = CE->getOperand(0); + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), + false/*ZExt*/); + return lowerConstant(Op); + } + + case Instruction::PtrToInt: { + const DataLayout &DL = getDataLayout(); + + // Support only foldable casts to/from pointers that can be eliminated by + // changing the pointer to the appropriately sized integer type. + Constant *Op = CE->getOperand(0); + Type *Ty = CE->getType(); + + const MCExpr *OpExpr = lowerConstant(Op); + + // We can emit the pointer value into this slot if the slot is an + // integer slot equal to the size of the pointer. + if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) + return OpExpr; + + // Otherwise the pointer is smaller than the resultant integer, mask off + // the high bits so we are sure to get a proper truncation if the input is + // a constant expr. + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); + const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); + return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); + } + + // The MC library also has a right-shift operator, but it isn't consistently + // signed or unsigned between different targets. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::SRem: + case Instruction::Shl: + case Instruction::And: + case Instruction::Or: + case Instruction::Xor: { + const MCExpr *LHS = lowerConstant(CE->getOperand(0)); + const MCExpr *RHS = lowerConstant(CE->getOperand(1)); + switch (CE->getOpcode()) { + default: llvm_unreachable("Unknown binary operator constant cast expr"); + case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); + case Instruction::Sub: return MCBinaryExpr::createSub(LHS, RHS, Ctx); + case Instruction::Mul: return MCBinaryExpr::createMul(LHS, RHS, Ctx); + case Instruction::SDiv: return MCBinaryExpr::createDiv(LHS, RHS, Ctx); + case Instruction::SRem: return MCBinaryExpr::createMod(LHS, RHS, Ctx); + case Instruction::Shl: return MCBinaryExpr::createShl(LHS, RHS, Ctx); + case Instruction::And: return MCBinaryExpr::createAnd(LHS, RHS, Ctx); + case Instruction::Or: return MCBinaryExpr::createOr (LHS, RHS, Ctx); + case Instruction::Xor: return MCBinaryExpr::createXor(LHS, RHS, Ctx); + } + } + } +} + +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, + AsmPrinter &AP, + const Constant *BaseCV = nullptr, + uint64_t Offset = 0); + +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const ConstantDataSequential *V) { + StringRef Data = V->getRawDataValues(); + assert(!Data.empty() && "Empty aggregates should be CAZ node"); + char C = Data[0]; + for (unsigned i = 1, e = Data.size(); i != e; ++i) + if (Data[i] != C) return -1; + return static_cast<uint8_t>(C); // Ensure 255 is not returned as -1. +} + + +/// isRepeatedByteSequence - Determine whether the given value is +/// composed of a repeated sequence of identical bytes and return the +/// byte value. If it is not a repeated sequence, return -1. +static int isRepeatedByteSequence(const Value *V, const DataLayout &DL) { + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + uint64_t Size = DL.getTypeAllocSizeInBits(V->getType()); + assert(Size % 8 == 0); + + // Extend the element to take zero padding into account. + APInt Value = CI->getValue().zextOrSelf(Size); + if (!Value.isSplat(8)) + return -1; + + return Value.zextOrTrunc(8).getZExtValue(); + } + if (const ConstantArray *CA = dyn_cast<ConstantArray>(V)) { + // Make sure all array elements are sequences of the same repeated + // byte. + assert(CA->getNumOperands() != 0 && "Should be a CAZ"); + Constant *Op0 = CA->getOperand(0); + int Byte = isRepeatedByteSequence(Op0, DL); + if (Byte == -1) + return -1; + + // All array elements must be equal. + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) + if (CA->getOperand(i) != Op0) + return -1; + return Byte; + } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) + return isRepeatedByteSequence(CDS); + + return -1; +} + +static void emitGlobalConstantDataSequential(const DataLayout &DL, + const ConstantDataSequential *CDS, + AsmPrinter &AP) { + + // See if we can aggregate this into a .fill, if so, emit it as such. + int Value = isRepeatedByteSequence(CDS, DL); + if (Value != -1) { + uint64_t Bytes = DL.getTypeAllocSize(CDS->getType()); + // Don't emit a 1-byte object as a .fill. + if (Bytes > 1) + return AP.OutStreamer->EmitFill(Bytes, Value); + } + + // If this can be emitted with .ascii/.asciz, emit it as such. + if (CDS->isString()) + return AP.OutStreamer->EmitBytes(CDS->getAsString()); + + // Otherwise, emit the values in successive locations. + unsigned ElementByteSize = CDS->getElementByteSize(); + if (isa<IntegerType>(CDS->getElementType())) { + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + if (AP.isVerbose()) + AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", + CDS->getElementAsInteger(i)); + AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), + ElementByteSize); + } + } else { + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + emitGlobalConstantFP(cast<ConstantFP>(CDS->getElementAsConstant(I)), AP); + } + + unsigned Size = DL.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * + CDS->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer->EmitZeros(Padding); + +} + +static void emitGlobalConstantArray(const DataLayout &DL, + const ConstantArray *CA, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { + // See if we can aggregate some values. Make sure it can be + // represented as a series of bytes of the constant value. + int Value = isRepeatedByteSequence(CA, DL); + + if (Value != -1) { + uint64_t Bytes = DL.getTypeAllocSize(CA->getType()); + AP.OutStreamer->EmitFill(Bytes, Value); + } + else { + for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) { + emitGlobalConstantImpl(DL, CA->getOperand(i), AP, BaseCV, Offset); + Offset += DL.getTypeAllocSize(CA->getOperand(i)->getType()); + } + } +} + +static void emitGlobalConstantVector(const DataLayout &DL, + const ConstantVector *CV, AsmPrinter &AP) { + for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) + emitGlobalConstantImpl(DL, CV->getOperand(i), AP); + + unsigned Size = DL.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * + CV->getType()->getNumElements(); + if (unsigned Padding = Size - EmittedSize) + AP.OutStreamer->EmitZeros(Padding); +} + +static void emitGlobalConstantStruct(const DataLayout &DL, + const ConstantStruct *CS, AsmPrinter &AP, + const Constant *BaseCV, uint64_t Offset) { + // Print the fields in successive locations. Pad to align if needed! + unsigned Size = DL.getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL.getStructLayout(CS->getType()); + uint64_t SizeSoFar = 0; + for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { + const Constant *Field = CS->getOperand(i); + + // Print the actual field value. + emitGlobalConstantImpl(DL, Field, AP, BaseCV, Offset + SizeSoFar); + + // Check if padding is needed and insert one or more 0s. + uint64_t FieldSize = DL.getTypeAllocSize(Field->getType()); + uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) + - Layout->getElementOffset(i)) - FieldSize; + SizeSoFar += FieldSize + PadSize; + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + AP.OutStreamer->EmitZeros(PadSize); + } + assert(SizeSoFar == Layout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { + APInt API = CFP->getValueAPF().bitcastToAPInt(); + + // First print a comment with what we think the original floating-point value + // should have been. + if (AP.isVerbose()) { + SmallString<8> StrVal; + CFP->getValueAPF().toString(StrVal); + + if (CFP->getType()) + CFP->getType()->print(AP.OutStreamer->GetCommentOS()); + else + AP.OutStreamer->GetCommentOS() << "Printing <null> Type"; + AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n'; + } + + // Now iterate through the APInt chunks, emitting them in endian-correct + // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit + // floats). + unsigned NumBytes = API.getBitWidth() / 8; + unsigned TrailingBytes = NumBytes % sizeof(uint64_t); + const uint64_t *p = API.getRawData(); + + // PPC's long double has odd notions of endianness compared to how LLVM + // handles it: p[0] goes first for *big* endian on PPC. + if (AP.getDataLayout().isBigEndian() && !CFP->getType()->isPPC_FP128Ty()) { + int Chunk = API.getNumWords() - 1; + + if (TrailingBytes) + AP.OutStreamer->EmitIntValue(p[Chunk--], TrailingBytes); + + for (; Chunk >= 0; --Chunk) + AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); + } else { + unsigned Chunk; + for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) + AP.OutStreamer->EmitIntValue(p[Chunk], sizeof(uint64_t)); + + if (TrailingBytes) + AP.OutStreamer->EmitIntValue(p[Chunk], TrailingBytes); + } + + // Emit the tail padding for the long double. + const DataLayout &DL = AP.getDataLayout(); + AP.OutStreamer->EmitZeros(DL.getTypeAllocSize(CFP->getType()) - + DL.getTypeStoreSize(CFP->getType())); +} + +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { + const DataLayout &DL = AP.getDataLayout(); + unsigned BitWidth = CI->getBitWidth(); + + // Copy the value as we may massage the layout for constants whose bit width + // is not a multiple of 64-bits. + APInt Realigned(CI->getValue()); + uint64_t ExtraBits = 0; + unsigned ExtraBitsSize = BitWidth & 63; + + if (ExtraBitsSize) { + // The bit width of the data is not a multiple of 64-bits. + // The extra bits are expected to be at the end of the chunk of the memory. + // Little endian: + // * Nothing to be done, just record the extra bits to emit. + // Big endian: + // * Record the extra bits to emit. + // * Realign the raw data to emit the chunks of 64-bits. + if (DL.isBigEndian()) { + // Basically the structure of the raw data is a chunk of 64-bits cells: + // 0 1 BitWidth / 64 + // [chunk1][chunk2] ... [chunkN]. + // The most significant chunk is chunkN and it should be emitted first. + // However, due to the alignment issue chunkN contains useless bits. + // Realign the chunks so that they contain only useless information: + // ExtraBits 0 1 (BitWidth / 64) - 1 + // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBits = Realigned.getRawData()[0] & + (((uint64_t)-1) >> (64 - ExtraBitsSize)); + Realigned = Realigned.lshr(ExtraBitsSize); + } else + ExtraBits = Realigned.getRawData()[BitWidth / 64]; + } + + // We don't expect assemblers to support integer data directives + // for more than 64 bits, so we emit the data in at most 64-bit + // quantities at a time. + const uint64_t *RawData = Realigned.getRawData(); + for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { + uint64_t Val = DL.isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer->EmitIntValue(Val, 8); + } + + if (ExtraBitsSize) { + // Emit the extra bits after the 64-bits chunks. + + // Emit a directive that fills the expected size. + uint64_t Size = AP.getDataLayout().getTypeAllocSize(CI->getType()); + Size -= (BitWidth / 64) * 8; + assert(Size && Size * 8 >= ExtraBitsSize && + (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) + == ExtraBits && "Directive too small for extra bits."); + AP.OutStreamer->EmitIntValue(ExtraBits, Size); + } +} + +/// \brief Transform a not absolute MCExpr containing a reference to a GOT +/// equivalent global, by a target specific GOT pc relative access to the +/// final symbol. +static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME, + const Constant *BaseCst, + uint64_t Offset) { + // The global @foo below illustrates a global that uses a got equivalent. + // + // @bar = global i32 42 + // @gotequiv = private unnamed_addr constant i32* @bar + // @foo = i32 trunc (i64 sub (i64 ptrtoint (i32** @gotequiv to i64), + // i64 ptrtoint (i32* @foo to i64)) + // to i32) + // + // The cstexpr in @foo is converted into the MCExpr `ME`, where we actually + // check whether @foo is suitable to use a GOTPCREL. `ME` is usually in the + // form: + // + // foo = cstexpr, where + // cstexpr := <gotequiv> - "." + <cst> + // cstexpr := <gotequiv> - (<foo> - <offset from @foo base>) + <cst> + // + // After canonicalization by evaluateAsRelocatable `ME` turns into: + // + // cstexpr := <gotequiv> - <foo> + gotpcrelcst, where + // gotpcrelcst := <offset from @foo base> + <cst> + // + MCValue MV; + if (!(*ME)->evaluateAsRelocatable(MV, nullptr, nullptr) || MV.isAbsolute()) + return; + const MCSymbolRefExpr *SymA = MV.getSymA(); + if (!SymA) + return; + + // Check that GOT equivalent symbol is cached. + const MCSymbol *GOTEquivSym = &SymA->getSymbol(); + if (!AP.GlobalGOTEquivs.count(GOTEquivSym)) + return; + + const GlobalValue *BaseGV = dyn_cast_or_null<GlobalValue>(BaseCst); + if (!BaseGV) + return; + + // Check for a valid base symbol + const MCSymbol *BaseSym = AP.getSymbol(BaseGV); + const MCSymbolRefExpr *SymB = MV.getSymB(); + + if (!SymB || BaseSym != &SymB->getSymbol()) + return; + + // Make sure to match: + // + // gotpcrelcst := <offset from @foo base> + <cst> + // + // If gotpcrelcst is positive it means that we can safely fold the pc rel + // displacement into the GOTPCREL. We can also can have an extra offset <cst> + // if the target knows how to encode it. + // + int64_t GOTPCRelCst = Offset + MV.getConstant(); + if (GOTPCRelCst < 0) + return; + if (!AP.getObjFileLowering().supportGOTPCRelWithOffset() && GOTPCRelCst != 0) + return; + + // Emit the GOT PC relative to replace the got equivalent global, i.e.: + // + // bar: + // .long 42 + // gotequiv: + // .quad bar + // foo: + // .long gotequiv - "." + <cst> + // + // is replaced by the target specific equivalent to: + // + // bar: + // .long 42 + // foo: + // .long bar@GOTPCREL+<gotpcrelcst> + // + AsmPrinter::GOTEquivUsePair Result = AP.GlobalGOTEquivs[GOTEquivSym]; + const GlobalVariable *GV = Result.first; + int NumUses = (int)Result.second; + const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0)); + const MCSymbol *FinalSym = AP.getSymbol(FinalGV); + *ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel( + FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer); + + // Update GOT equivalent usage information + --NumUses; + if (NumUses >= 0) + AP.GlobalGOTEquivs[GOTEquivSym] = std::make_pair(GV, NumUses); +} + +static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV, + AsmPrinter &AP, const Constant *BaseCV, + uint64_t Offset) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + + // Globals with sub-elements such as combinations of arrays and structs + // are handled recursively by emitGlobalConstantImpl. Keep track of the + // constant symbol base and the current position with BaseCV and Offset. + if (!BaseCV && CV->hasOneUse()) + BaseCV = dyn_cast<Constant>(CV->user_back()); + + if (isa<ConstantAggregateZero>(CV) || isa<UndefValue>(CV)) + return AP.OutStreamer->EmitZeros(Size); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + switch (Size) { + case 1: + case 2: + case 4: + case 8: + if (AP.isVerbose()) + AP.OutStreamer->GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); + AP.OutStreamer->EmitIntValue(CI->getZExtValue(), Size); + return; + default: + emitGlobalConstantLargeInt(CI, AP); + return; + } + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) + return emitGlobalConstantFP(CFP, AP); + + if (isa<ConstantPointerNull>(CV)) { + AP.OutStreamer->EmitIntValue(0, Size); + return; + } + + if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(CV)) + return emitGlobalConstantDataSequential(DL, CDS, AP); + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) + return emitGlobalConstantArray(DL, CVA, AP, BaseCV, Offset); + + if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) + return emitGlobalConstantStruct(DL, CVS, AP, BaseCV, Offset); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV)) { + // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of + // vectors). + if (CE->getOpcode() == Instruction::BitCast) + return emitGlobalConstantImpl(DL, CE->getOperand(0), AP); + + if (Size > 8) { + // If the constant expression's size is greater than 64-bits, then we have + // to emit the value in chunks. Try to constant fold the value and emit it + // that way. + Constant *New = ConstantFoldConstantExpression(CE, DL); + if (New && New != CE) + return emitGlobalConstantImpl(DL, New, AP); + } + } + + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) + return emitGlobalConstantVector(DL, V, AP); + + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it + // thread the streamer with EmitValue. + const MCExpr *ME = AP.lowerConstant(CV); + + // Since lowerConstant already folded and got rid of all IR pointer and + // integer casts, detect GOT equivalent accesses by looking into the MCExpr + // directly. + if (AP.getObjFileLowering().supportIndirectSymViaGOTPCRel()) + handleIndirectSymViaGOTPCRel(AP, &ME, BaseCV, Offset); + + AP.OutStreamer->EmitValue(ME, Size); +} + +/// EmitGlobalConstant - Print a general LLVM constant to the .s file. +void AsmPrinter::EmitGlobalConstant(const DataLayout &DL, const Constant *CV) { + uint64_t Size = DL.getTypeAllocSize(CV->getType()); + if (Size) + emitGlobalConstantImpl(DL, CV, *this); + else if (MAI->hasSubsectionsViaSymbols()) { + // If the global has zero size, emit a single byte so that two labels don't + // look like they are at the same location. + OutStreamer->EmitIntValue(0, 1); + } +} + +void AsmPrinter::EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) { + // Target doesn't support this yet! + llvm_unreachable("Target does not support EmitMachineConstantPoolValue"); +} + +void AsmPrinter::printOffset(int64_t Offset, raw_ostream &OS) const { + if (Offset > 0) + OS << '+' << Offset; + else if (Offset < 0) + OS << Offset; +} + +//===----------------------------------------------------------------------===// +// Symbol Lowering Routines. +//===----------------------------------------------------------------------===// + +MCSymbol *AsmPrinter::createTempSymbol(const Twine &Name) const { + return OutContext.createTempSymbol(Name, true); +} + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { + return MMI->getAddrLabelSymbol(BA->getBasicBlock()); +} + +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const { + return MMI->getAddrLabelSymbol(BB); +} + +/// GetCPISymbol - Return the symbol for the specified constant pool entry. +MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const { + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + "CPI" + Twine(getFunctionNumber()) + "_" + + Twine(CPID)); +} + +/// GetJTISymbol - Return the symbol for the specified jump table entry. +MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const { + return MF->getJTISymbol(JTID, OutContext, isLinkerPrivate); +} + +/// GetJTSetSymbol - Return the symbol for the specified jump table .set +/// FIXME: privatize to AsmPrinter. +MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const { + const DataLayout &DL = getDataLayout(); + return OutContext.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + Twine(getFunctionNumber()) + "_" + + Twine(UID) + "_set_" + Twine(MBBID)); +} + +MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV, + StringRef Suffix) const { + return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang, + TM); +} + +/// Return the MCSymbol for the specified ExternalSymbol. +MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { + SmallString<60> NameStr; + Mangler::getNameWithPrefix(NameStr, Sym, getDataLayout()); + return OutContext.getOrCreateSymbol(NameStr); +} + + + +/// PrintParentLoopComment - Print comments about parent loops of this one. +static void PrintParentLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + if (!Loop) return; + PrintParentLoopComment(OS, Loop->getParentLoop(), FunctionNumber); + OS.indent(Loop->getLoopDepth()*2) + << "Parent Loop BB" << FunctionNumber << "_" + << Loop->getHeader()->getNumber() + << " Depth=" << Loop->getLoopDepth() << '\n'; +} + + +/// PrintChildLoopComment - Print comments about child loops within +/// the loop for this basic block, with nesting. +static void PrintChildLoopComment(raw_ostream &OS, const MachineLoop *Loop, + unsigned FunctionNumber) { + // Add child loop information + for (const MachineLoop *CL : *Loop) { + OS.indent(CL->getLoopDepth()*2) + << "Child Loop BB" << FunctionNumber << "_" + << CL->getHeader()->getNumber() << " Depth " << CL->getLoopDepth() + << '\n'; + PrintChildLoopComment(OS, CL, FunctionNumber); + } +} + +/// emitBasicBlockLoopComments - Pretty-print comments for basic blocks. +static void emitBasicBlockLoopComments(const MachineBasicBlock &MBB, + const MachineLoopInfo *LI, + const AsmPrinter &AP) { + // Add loop depth information + const MachineLoop *Loop = LI->getLoopFor(&MBB); + if (!Loop) return; + + MachineBasicBlock *Header = Loop->getHeader(); + assert(Header && "No header for loop"); + + // If this block is not a loop header, just print out what is the loop header + // and return. + if (Header != &MBB) { + AP.OutStreamer->AddComment(" in Loop: Header=BB" + + Twine(AP.getFunctionNumber())+"_" + + Twine(Loop->getHeader()->getNumber())+ + " Depth="+Twine(Loop->getLoopDepth())); + return; + } + + // Otherwise, it is a loop header. Print out information about child and + // parent loops. + raw_ostream &OS = AP.OutStreamer->GetCommentOS(); + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + + OS << "=>"; + OS.indent(Loop->getLoopDepth()*2-2); + + OS << "This "; + if (Loop->empty()) + OS << "Inner "; + OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; + + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); +} + + +/// EmitBasicBlockStart - This method prints the label for the specified +/// MachineBasicBlock, an alignment (if present) and a comment describing +/// it if appropriate. +void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const { + // End the previous funclet and start a new one. + if (MBB.isEHFuncletEntry()) { + for (const HandlerInfo &HI : Handlers) { + HI.Handler->endFunclet(); + HI.Handler->beginFunclet(MBB); + } + } + + // Emit an alignment directive for this block, if needed. + if (unsigned Align = MBB.getAlignment()) + EmitAlignment(Align); + + // If the block has its address taken, emit any labels that were used to + // reference the block. It is possible that there is more than one label + // here, because multiple LLVM BB's may have been RAUW'd to this block after + // the references were generated. + if (MBB.hasAddressTaken()) { + const BasicBlock *BB = MBB.getBasicBlock(); + if (isVerbose()) + OutStreamer->AddComment("Block address taken"); + + // MBBs can have their address taken as part of CodeGen without having + // their corresponding BB's address taken in IR + if (BB->hasAddressTaken()) + for (MCSymbol *Sym : MMI->getAddrLabelSymbolToEmit(BB)) + OutStreamer->EmitLabel(Sym); + } + + // Print some verbose block comments. + if (isVerbose()) { + if (const BasicBlock *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + BB->printAsOperand(OutStreamer->GetCommentOS(), + /*PrintType=*/false, BB->getModule()); + OutStreamer->GetCommentOS() << '\n'; + } + } + emitBasicBlockLoopComments(MBB, LI, *this); + } + + // Print the main label for the block. + if (MBB.pred_empty() || + (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) { + if (isVerbose()) { + // NOTE: Want this comment at start of line, don't emit with AddComment. + OutStreamer->emitRawComment(" BB#" + Twine(MBB.getNumber()) + ":", false); + } + } else { + OutStreamer->EmitLabel(MBB.getSymbol()); + } +} + +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, + bool IsDefinition) const { + MCSymbolAttr Attr = MCSA_Invalid; + + switch (Visibility) { + default: break; + case GlobalValue::HiddenVisibility: + if (IsDefinition) + Attr = MAI->getHiddenVisibilityAttr(); + else + Attr = MAI->getHiddenDeclarationVisibilityAttr(); + break; + case GlobalValue::ProtectedVisibility: + Attr = MAI->getProtectedVisibilityAttr(); + break; + } + + if (Attr != MCSA_Invalid) + OutStreamer->EmitSymbolAttribute(Sym, Attr); +} + +/// isBlockOnlyReachableByFallthough - Return true if the basic block has +/// exactly one predecessor and the control transfer mechanism between +/// the predecessor and this block is a fall-through. +bool AsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isEHPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + if (MBB->pred_size() > 1) + return false; + + // The predecessor has to be immediately before this block. + MachineBasicBlock *Pred = *MBB->pred_begin(); + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; + + // Check the terminators in the previous blocks + for (const auto &MI : Pred->terminators()) { + // If it is not a simple branch, we are in a table somewhere. + if (!MI.isBranch() || MI.isIndirectBranch()) + return false; + + // If we are the operands of one of the branches, this is not a fall + // through. Note that targets with delay slots will usually bundle + // terminators with the delay slot instruction. + for (ConstMIBundleOperands OP(&MI); OP.isValid(); ++OP) { + if (OP->isJTI()) + return false; + if (OP->isMBB() && OP->getMBB() == MBB) + return false; + } + } + + return true; +} + + + +GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) { + if (!S.usesMetadata()) + return nullptr; + + assert(!S.useStatepoints() && "statepoints do not currently support custom" + " stackmap formats, please see the documentation for a description of" + " the default format. If you really need a custom serialized format," + " please file a bug"); + + gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); + gcp_map_type::iterator GCPI = GCMap.find(&S); + if (GCPI != GCMap.end()) + return GCPI->second.get(); + + const char *Name = S.getName().c_str(); + + for (GCMetadataPrinterRegistry::iterator + I = GCMetadataPrinterRegistry::begin(), + E = GCMetadataPrinterRegistry::end(); I != E; ++I) + if (strcmp(Name, I->getName()) == 0) { + std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate(); + GMP->S = &S; + auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP))); + return IterBool.first->second.get(); + } + + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); +} + +/// Pin vtable to this file. +AsmPrinterHandler::~AsmPrinterHandler() {} + +void AsmPrinterHandler::markFunctionEnd() {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp new file mode 100644 index 0000000..504c5d2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -0,0 +1,290 @@ +//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Dwarf emissions parts of AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#include "ByteStreamer.h" +#include "DwarfDebug.h" +#include "DwarfExpression.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +//===----------------------------------------------------------------------===// +// Dwarf Emission Helper Routines +//===----------------------------------------------------------------------===// + +/// EmitSLEB128 - emit the specified signed leb128 value. +void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->EmitSLEB128IntValue(Value); +} + +/// EmitULEB128 - emit the specified unsigned leb128 value. +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, + unsigned PadTo) const { + if (isVerbose() && Desc) + OutStreamer->AddComment(Desc); + + OutStreamer->EmitULEB128IntValue(Value, PadTo); +} + +static const char *DecodeDWARFEncoding(unsigned Encoding) { + switch (Encoding) { + case dwarf::DW_EH_PE_absptr: + return "absptr"; + case dwarf::DW_EH_PE_omit: + return "omit"; + case dwarf::DW_EH_PE_pcrel: + return "pcrel"; + case dwarf::DW_EH_PE_udata4: + return "udata4"; + case dwarf::DW_EH_PE_udata8: + return "udata8"; + case dwarf::DW_EH_PE_sdata4: + return "sdata4"; + case dwarf::DW_EH_PE_sdata8: + return "sdata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4: + return "pcrel udata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4: + return "pcrel sdata4"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8: + return "pcrel udata8"; + case dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8: + return "pcrel sdata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata4 + : + return "indirect pcrel udata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4 + : + return "indirect pcrel sdata4"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8 + : + return "indirect pcrel udata8"; + case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8 + : + return "indirect pcrel sdata8"; + } + + return "<unknown encoding>"; +} + +/// EmitEncodingByte - Emit a .byte 42 directive that corresponds to an +/// encoding. If verbose assembly output is enabled, we output comments +/// describing the encoding. Desc is an optional string saying what the +/// encoding is specifying (e.g. "LSDA"). +void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { + if (isVerbose()) { + if (Desc) + OutStreamer->AddComment(Twine(Desc) + " Encoding = " + + Twine(DecodeDWARFEncoding(Val))); + else + OutStreamer->AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); + } + + OutStreamer->EmitIntValue(Val, 1); +} + +/// GetSizeOfEncodedValue - Return the size of the encoding in bytes. +unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { + if (Encoding == dwarf::DW_EH_PE_omit) + return 0; + + switch (Encoding & 0x07) { + default: + llvm_unreachable("Invalid encoded value."); + case dwarf::DW_EH_PE_absptr: + return MF->getDataLayout().getPointerSize(); + case dwarf::DW_EH_PE_udata2: + return 2; + case dwarf::DW_EH_PE_udata4: + return 4; + case dwarf::DW_EH_PE_udata8: + return 8; + } +} + +void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, + unsigned Encoding) const { + if (GV) { + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); + + const MCExpr *Exp = + TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI, + *OutStreamer); + OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); + } else + OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); +} + +void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label, + bool ForceOffset) const { + if (!ForceOffset) { + // On COFF targets, we have to emit the special .secrel32 directive. + if (MAI->needsDwarfSectionOffsetDirective()) { + OutStreamer->EmitCOFFSecRel32(Label); + return; + } + + // If the format uses relocations with dwarf, refer to the symbol directly. + if (MAI->doesDwarfUseRelocationsAcrossSections()) { + OutStreamer->EmitSymbolValue(Label, 4); + return; + } + } + + // Otherwise, emit it as a label difference from the start of the section. + EmitLabelDifference(Label, Label->getSection().getBeginSymbol(), 4); +} + +void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const { + if (MAI->doesDwarfUseRelocationsAcrossSections()) { + emitDwarfSymbolReference(S.getSymbol()); + return; + } + + // Just emit the offset directly; no need for symbol math. + EmitInt32(S.getOffset()); +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer, + const MachineLocation &MLoc) const { + DebugLocDwarfExpression Expr(*MF->getSubtarget().getRegisterInfo(), + getDwarfDebug()->getDwarfVersion(), Streamer); + const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo(); + int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false); + if (Reg < 0) { + // We assume that pointers are always in an addressable register. + if (MLoc.isIndirect()) + // FIXME: We have no reasonable way of handling errors in here. The + // caller might be in the middle of a dwarf expression. We should + // probably assert that Reg >= 0 once debug info generation is more + // mature. + return Expr.EmitOp(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); + + // Attempt to find a valid super- or sub-register. + if (!Expr.AddMachineRegPiece(MLoc.getReg())) + Expr.EmitOp(dwarf::DW_OP_nop, + "nop (could not find a dwarf register number)"); + return; + } + + if (MLoc.isIndirect()) + Expr.AddRegIndirect(Reg, MLoc.getOffset()); + else + Expr.AddReg(Reg); +} + +//===----------------------------------------------------------------------===// +// Dwarf Lowering Routines +//===----------------------------------------------------------------------===// + +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer->EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpAdjustCfaOffset: + OutStreamer->EmitCFIAdjustCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer->EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer->EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer->EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpRegister: + OutStreamer->EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + break; + case MCCFIInstruction::OpWindowSave: + OutStreamer->EmitCFIWindowSave(); + break; + case MCCFIInstruction::OpSameValue: + OutStreamer->EmitCFISameValue(Inst.getRegister()); + break; + case MCCFIInstruction::OpGnuArgsSize: + OutStreamer->EmitCFIGnuArgsSize(Inst.getOffset()); + break; + case MCCFIInstruction::OpEscape: + OutStreamer->EmitCFIEscape(Inst.getValues()); + break; + } +} + +void AsmPrinter::emitDwarfDIE(const DIE &Die) const { + // Emit the code (index) for the abbreviation. + if (isVerbose()) + OutStreamer->AddComment("Abbrev [" + Twine(Die.getAbbrevNumber()) + "] 0x" + + Twine::utohexstr(Die.getOffset()) + ":0x" + + Twine::utohexstr(Die.getSize()) + " " + + dwarf::TagString(Die.getTag())); + EmitULEB128(Die.getAbbrevNumber()); + + // Emit the DIE attribute values. + for (const auto &V : Die.values()) { + dwarf::Attribute Attr = V.getAttribute(); + assert(V.getForm() && "Too many attributes for DIE (check abbreviation)"); + + if (isVerbose()) { + OutStreamer->AddComment(dwarf::AttributeString(Attr)); + if (Attr == dwarf::DW_AT_accessibility) + OutStreamer->AddComment( + dwarf::AccessibilityString(V.getDIEInteger().getValue())); + } + + // Emit an attribute using the defined form. + V.EmitValue(this); + } + + // Emit the DIE children if any. + if (Die.hasChildren()) { + for (auto &Child : Die.children()) + emitDwarfDIE(Child); + + OutStreamer->AddComment("End Of Children Mark"); + EmitInt8(0); + } +} + +void AsmPrinter::emitDwarfAbbrev(const DIEAbbrev &Abbrev) const { + // Emit the abbreviations code (base 1 index.) + EmitULEB128(Abbrev.getNumber(), "Abbreviation Code"); + + // Emit the abbreviations data. + Abbrev.Emit(this); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h new file mode 100644 index 0000000..e59961f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterHandler.h @@ -0,0 +1,67 @@ +//===-- lib/CodeGen/AsmPrinter/AsmPrinterHandler.h -------------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a generic interface for AsmPrinter handlers, +// like debug and EH info emitters. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_ASMPRINTERHANDLER_H + +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MachineBasicBlock; +class MachineFunction; +class MachineInstr; +class MCSymbol; + +/// \brief Collects and handles AsmPrinter objects required to build debug +/// or EH information. +class AsmPrinterHandler { +public: + virtual ~AsmPrinterHandler(); + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) = 0; + + /// \brief Emit all sections that should come after the content. + virtual void endModule() = 0; + + /// \brief Gather pre-function debug information. + /// Every beginFunction(MF) call should be followed by an endFunction(MF) + /// call. + virtual void beginFunction(const MachineFunction *MF) = 0; + + // \brief Emit any of function marker (like .cfi_endproc). This is called + // before endFunction and cannot switch sections. + virtual void markFunctionEnd(); + + /// \brief Gather post-function debug information. + /// Please note that some AsmPrinter implementations may not call + /// beginFunction at all. + virtual void endFunction(const MachineFunction *MF) = 0; + + /// \brief Emit target-specific EH funclet machinery. + virtual void beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym = nullptr) {} + virtual void endFunclet() {} + + /// \brief Process beginning of an instruction. + virtual void beginInstruction(const MachineInstr *MI) = 0; + + /// \brief Process end of an instruction. + virtual void endInstruction() = 0; +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp new file mode 100644 index 0000000..4171657 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -0,0 +1,573 @@ +//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the inline assembler pieces of the AsmPrinter class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "asm-printer" + +namespace { + struct SrcMgrDiagInfo { + const MDNode *LocInfo; + LLVMContext::InlineAsmDiagHandlerTy DiagHandler; + void *DiagContext; + }; +} + +/// srcMgrDiagHandler - This callback is invoked when the SourceMgr for an +/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo +/// struct above. +static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) { + SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo); + assert(DiagInfo && "Diagnostic context not passed down?"); + + // If the inline asm had metadata associated with it, pull out a location + // cookie corresponding to which line the error occurred on. + unsigned LocCookie = 0; + if (const MDNode *LocInfo = DiagInfo->LocInfo) { + unsigned ErrorLine = Diag.getLineNo()-1; + if (ErrorLine >= LocInfo->getNumOperands()) + ErrorLine = 0; + + if (LocInfo->getNumOperands() != 0) + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocInfo->getOperand(ErrorLine))) + LocCookie = CI->getZExtValue(); + } + + DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie); +} + +/// EmitInlineAsm - Emit a blob of inline asm to the output streamer. +void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI, + const MCTargetOptions &MCOptions, + const MDNode *LocMDNode, + InlineAsm::AsmDialect Dialect) const { + assert(!Str.empty() && "Can't emit empty inline asm block"); + + // Remember if the buffer is nul terminated or not so we can avoid a copy. + bool isNullTerminated = Str.back() == 0; + if (isNullTerminated) + Str = Str.substr(0, Str.size()-1); + + // If the output streamer does not have mature MC support or the integrated + // assembler has been disabled, just emit the blob textually. + // Otherwise parse the asm and emit it via MC support. + // This is useful in case the asm parser doesn't handle something but the + // system assembler does. + const MCAsmInfo *MCAI = TM.getMCAsmInfo(); + assert(MCAI && "No MCAsmInfo"); + if (!MCAI->useIntegratedAssembler() && + !OutStreamer->isIntegratedAssemblerRequired()) { + emitInlineAsmStart(); + OutStreamer->EmitRawText(Str); + emitInlineAsmEnd(STI, nullptr); + return; + } + + SourceMgr SrcMgr; + SrcMgrDiagInfo DiagInfo; + + // If the current LLVMContext has an inline asm handler, set it in SourceMgr. + LLVMContext &LLVMCtx = MMI->getModule()->getContext(); + bool HasDiagHandler = false; + if (LLVMCtx.getInlineAsmDiagnosticHandler() != nullptr) { + // If the source manager has an issue, we arrange for srcMgrDiagHandler + // to be invoked, getting DiagInfo passed into it. + DiagInfo.LocInfo = LocMDNode; + DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler(); + DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext(); + SrcMgr.setDiagHandler(srcMgrDiagHandler, &DiagInfo); + HasDiagHandler = true; + } + + std::unique_ptr<MemoryBuffer> Buffer; + if (isNullTerminated) + Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>"); + else + Buffer = MemoryBuffer::getMemBufferCopy(Str, "<inline asm>"); + + // Tell SrcMgr about this buffer, it takes ownership of the buffer. + SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc()); + + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, OutContext, *OutStreamer, *MAI)); + + // We create a new MCInstrInfo here since we might be at the module level + // and not have a MachineFunction to initialize the TargetInstrInfo from and + // we only need MCInstrInfo for asm parsing. We create one unconditionally + // because it's not subtarget dependent. + std::unique_ptr<MCInstrInfo> MII(TM.getTarget().createMCInstrInfo()); + std::unique_ptr<MCTargetAsmParser> TAP(TM.getTarget().createMCAsmParser( + STI, *Parser, *MII, MCOptions)); + if (!TAP) + report_fatal_error("Inline asm not supported by this streamer because" + " we don't have an asm parser for this target\n"); + Parser->setAssemblerDialect(Dialect); + Parser->setTargetParser(*TAP.get()); + if (MF) { + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + TAP->SetFrameRegister(TRI->getFrameRegister(*MF)); + } + + emitInlineAsmStart(); + // Don't implicitly switch to the text section before the asm. + int Res = Parser->Run(/*NoInitialTextSection*/ true, + /*NoFinalize*/ true); + emitInlineAsmEnd(STI, &TAP->getSTI()); + if (Res && !HasDiagHandler) + report_fatal_error("Error parsing inline asm\n"); +} + +static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + AsmPrinter *AP, unsigned LocCookie, + raw_ostream &OS) { + // Switch to the inline assembly variant. + OS << "\t.intel_syntax\n\t"; + + const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': + ++LastEmitted; // Consume second '$' character. + break; + } + if (Done) break; + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ nullptr, OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + /*Modifier*/ nullptr, OS); + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + break; + } + } + } + OS << "\n\t.att_syntax\n" << (char)0; // null terminate string. +} + +static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI, + MachineModuleInfo *MMI, int InlineAsmVariant, + int AsmPrinterVariant, AsmPrinter *AP, + unsigned LocCookie, raw_ostream &OS) { + int CurVariant = -1; // The number of the {.|.|.} region we are in. + const char *LastEmitted = AsmStr; // One past the last character emitted. + unsigned NumOperands = MI->getNumOperands(); + + OS << '\t'; + + while (*LastEmitted) { + switch (*LastEmitted) { + default: { + // Not a special case, emit the string section literally. + const char *LiteralEnd = LastEmitted+1; + while (*LiteralEnd && *LiteralEnd != '{' && *LiteralEnd != '|' && + *LiteralEnd != '}' && *LiteralEnd != '$' && *LiteralEnd != '\n') + ++LiteralEnd; + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS.write(LastEmitted, LiteralEnd-LastEmitted); + LastEmitted = LiteralEnd; + break; + } + case '\n': + ++LastEmitted; // Consume newline character. + OS << '\n'; // Indent code with newline. + break; + case '$': { + ++LastEmitted; // Consume '$' character. + bool Done = true; + + // Handle escapes. + switch (*LastEmitted) { + default: Done = false; break; + case '$': // $$ -> $ + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) + OS << '$'; + ++LastEmitted; // Consume second '$' character. + break; + case '(': // $( -> same as GCC's { character. + ++LastEmitted; // Consume '(' character. + if (CurVariant != -1) + report_fatal_error("Nested variants found in inline asm string: '" + + Twine(AsmStr) + "'"); + CurVariant = 0; // We're in the first variant now. + break; + case '|': + ++LastEmitted; // consume '|' character. + if (CurVariant == -1) + OS << '|'; // this is gcc's behavior for | outside a variant + else + ++CurVariant; // We're in the next variant. + break; + case ')': // $) -> same as GCC's } char. + ++LastEmitted; // consume ')' character. + if (CurVariant == -1) + OS << '}'; // this is gcc's behavior for } outside a variant + else + CurVariant = -1; + break; + } + if (Done) break; + + bool HasCurlyBraces = false; + if (*LastEmitted == '{') { // ${variable} + ++LastEmitted; // Consume '{' character. + HasCurlyBraces = true; + } + + // If we have ${:foo}, then this is not a real operand reference, it is a + // "magic" string reference, just like in .td files. Arrange to call + // PrintSpecial. + if (HasCurlyBraces && *LastEmitted == ':') { + ++LastEmitted; + const char *StrStart = LastEmitted; + const char *StrEnd = strchr(StrStart, '}'); + if (!StrEnd) + report_fatal_error("Unterminated ${:foo} operand in inline asm" + " string: '" + Twine(AsmStr) + "'"); + + std::string Val(StrStart, StrEnd); + AP->PrintSpecial(MI, OS, Val.c_str()); + LastEmitted = StrEnd+1; + break; + } + + const char *IDStart = LastEmitted; + const char *IDEnd = IDStart; + while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd; + + unsigned Val; + if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val)) + report_fatal_error("Bad $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + LastEmitted = IDEnd; + + char Modifier[2] = { 0, 0 }; + + if (HasCurlyBraces) { + // If we have curly braces, check for a modifier character. This + // supports syntax like ${0:u}, which correspond to "%u0" in GCC asm. + if (*LastEmitted == ':') { + ++LastEmitted; // Consume ':' character. + if (*LastEmitted == 0) + report_fatal_error("Bad ${:} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + + Modifier[0] = *LastEmitted; + ++LastEmitted; // Consume modifier character. + } + + if (*LastEmitted != '}') + report_fatal_error("Bad ${} expression in inline asm string: '" + + Twine(AsmStr) + "'"); + ++LastEmitted; // Consume '}' character. + } + + if (Val >= NumOperands-1) + report_fatal_error("Invalid $ operand number in inline asm string: '" + + Twine(AsmStr) + "'"); + + // Okay, we finally have a value number. Ask the target to print this + // operand! + if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + + bool Error = false; + + // Scan to find the machine operand number for the operand. + for (; Val; --Val) { + if (OpNo >= MI->getNumOperands()) break; + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + OpNo += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + + // We may have a location metadata attached to the end of the + // instruction, and at no point should see metadata at any + // other point while processing. It's an error if so. + if (OpNo >= MI->getNumOperands() || + MI->getOperand(OpNo).isMetadata()) { + Error = true; + } else { + unsigned OpFlags = MI->getOperand(OpNo).getImm(); + ++OpNo; // Skip over the ID number. + + if (Modifier[0] == 'l') { // Labels are target independent. + // FIXME: What if the operand isn't an MBB, report error? + const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol(); + Sym->print(OS, AP->MAI); + } else { + if (InlineAsm::isMemKind(OpFlags)) { + Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, + Modifier[0] ? Modifier : nullptr, + OS); + } else { + Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant, + Modifier[0] ? Modifier : nullptr, OS); + } + } + } + if (Error) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "invalid operand in inline asm: '" << AsmStr << "'"; + MMI->getModule()->getContext().emitError(LocCookie, Msg.str()); + } + } + break; + } + } + } + OS << '\n' << (char)0; // null terminate string. +} + +/// EmitInlineAsm - This method formats and emits the specified machine +/// instruction that is an inline asm. +void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { + assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms"); + + // Count the number of register definitions to find the asm string. + unsigned NumDefs = 0; + for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef(); + ++NumDefs) + assert(NumDefs != MI->getNumOperands()-2 && "No asm string?"); + + assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?"); + + // Disassemble the AsmStr, printing out the literal pieces, the operands, etc. + const char *AsmStr = MI->getOperand(NumDefs).getSymbolName(); + + // If this asmstr is empty, just print the #APP/#NOAPP markers. + // These are useful to see where empty asm's wound up. + if (AsmStr[0] == 0) { + OutStreamer->emitRawComment(MAI->getInlineAsmStart()); + OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); + return; + } + + // Emit the #APP start marker. This has to happen even if verbose-asm isn't + // enabled, so we use emitRawComment. + OutStreamer->emitRawComment(MAI->getInlineAsmStart()); + + // Get the !srcloc metadata node if we have it, and decode the loc cookie from + // it. + unsigned LocCookie = 0; + const MDNode *LocMD = nullptr; + for (unsigned i = MI->getNumOperands(); i != 0; --i) { + if (MI->getOperand(i-1).isMetadata() && + (LocMD = MI->getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + // Emit the inline asm to a temporary string so we can emit it through + // EmitInlineAsm. + SmallString<256> StringData; + raw_svector_ostream OS(StringData); + + // The variant of the current asmprinter. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect(); + AsmPrinter *AP = const_cast<AsmPrinter*>(this); + if (InlineAsmVariant == InlineAsm::AD_ATT) + EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant, + AP, LocCookie, OS); + else + EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS); + + // Reset SanitizeAddress based on the function's attribute. + MCTargetOptions MCOptions = TM.Options.MCOptions; + MCOptions.SanitizeAddress = + MF->getFunction()->hasFnAttribute(Attribute::SanitizeAddress); + + EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD, + MI->getInlineAsmDialect()); + + // Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't + // enabled, so we use emitRawComment. + OutStreamer->emitRawComment(MAI->getInlineAsmEnd()); +} + + +/// PrintSpecial - Print information related to the specified machine instr +/// that is independent of the operand, and may be independent of the instr +/// itself. This can be useful for portably encoding the comment character +/// or other bits of target-specific knowledge into the asmstrings. The +/// syntax used is ${:comment}. Targets can override this to add support +/// for their own strange codes. +void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS, + const char *Code) const { + if (!strcmp(Code, "private")) { + const DataLayout &DL = MF->getDataLayout(); + OS << DL.getPrivateGlobalPrefix(); + } else if (!strcmp(Code, "comment")) { + OS << MAI->getCommentString(); + } else if (!strcmp(Code, "uid")) { + // Comparing the address of MI isn't sufficient, because machineinstrs may + // be allocated to the same address across functions. + + // If this is a new LastFn instruction, bump the counter. + if (LastMI != MI || LastFn != getFunctionNumber()) { + ++Counter; + LastMI = MI; + LastFn = getFunctionNumber(); + } + OS << Counter; + } else { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Unknown special formatter '" << Code + << "' for machine instr: " << *MI; + report_fatal_error(Msg.str()); + } +} + +/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM +/// instruction, using the specified assembler variant. Targets should +/// override this to format as appropriate. +bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, const char *ExtraCode, + raw_ostream &O) { + // Does this asm operand have a single letter operand modifier? + if (ExtraCode && ExtraCode[0]) { + if (ExtraCode[1] != 0) return true; // Unknown modifier. + + const MachineOperand &MO = MI->getOperand(OpNo); + switch (ExtraCode[0]) { + default: + return true; // Unknown modifier. + case 'c': // Substitute immediate value without immediate syntax + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << MO.getImm(); + return false; + case 'n': // Negate the immediate constant. + if (MO.getType() != MachineOperand::MO_Immediate) + return true; + O << -MO.getImm(); + return false; + } + } + return true; +} + +bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, + unsigned AsmVariant, + const char *ExtraCode, raw_ostream &O) { + // Target doesn't support this yet! + return true; +} + +void AsmPrinter::emitInlineAsmStart() const {} + +void AsmPrinter::emitInlineAsmEnd(const MCSubtargetInfo &StartInfo, + const MCSubtargetInfo *EndInfo) const {} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h new file mode 100644 index 0000000..df1997b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ByteStreamer.h @@ -0,0 +1,111 @@ +//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a class that can take bytes that would normally be +// streamed via the AsmPrinter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_BYTESTREAMER_H + +#include "DIEHash.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include <string> + +namespace llvm { +class ByteStreamer { + protected: + ~ByteStreamer() = default; + ByteStreamer(const ByteStreamer&) = default; + ByteStreamer() = default; + + public: + // For now we're just handling the calls we need for dwarf emission/hashing. + virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0; + virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0; + virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0; +}; + +class APByteStreamer final : public ByteStreamer { +private: + AsmPrinter &AP; + +public: + APByteStreamer(AsmPrinter &Asm) : AP(Asm) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + AP.OutStreamer->AddComment(Comment); + AP.EmitInt8(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer->AddComment(Comment); + AP.EmitSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + AP.OutStreamer->AddComment(Comment); + AP.EmitULEB128(DWord); + } +}; + +class HashingByteStreamer final : public ByteStreamer { + private: + DIEHash &Hash; + public: + HashingByteStreamer(DIEHash &H) : Hash(H) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Hash.update(Byte); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addSLEB128(DWord); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + Hash.addULEB128(DWord); + } +}; + +class BufferByteStreamer final : public ByteStreamer { +private: + SmallVectorImpl<char> &Buffer; + SmallVectorImpl<std::string> &Comments; + + /// \brief Only verbose textual output needs comments. This will be set to + /// true for that case, and false otherwise. If false, comments passed in to + /// the emit methods will be ignored. + bool GenerateComments; + +public: + BufferByteStreamer(SmallVectorImpl<char> &Buffer, + SmallVectorImpl<std::string> &Comments, + bool GenerateComments) + : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {} + void EmitInt8(uint8_t Byte, const Twine &Comment) override { + Buffer.push_back(Byte); + if (GenerateComments) + Comments.push_back(Comment.str()); + } + void EmitSLEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeSLEB128(DWord, OSE); + if (GenerateComments) + Comments.push_back(Comment.str()); + } + void EmitULEB128(uint64_t DWord, const Twine &Comment) override { + raw_svector_ostream OSE(Buffer); + encodeULEB128(DWord, OSE); + if (GenerateComments) + Comments.push_back(Comment.str()); + } +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp new file mode 100644 index 0000000..bf794f7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -0,0 +1,614 @@ +//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Data structures for DWARF info entries. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DIE.h" +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// DIEAbbrevData Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { + // Explicitly cast to an integer type for which FoldingSetNodeID has + // overloads. Otherwise MSVC 2010 thinks this call is ambiguous. + ID.AddInteger(unsigned(Attribute)); + ID.AddInteger(unsigned(Form)); +} + +//===----------------------------------------------------------------------===// +// DIEAbbrev Implementation +//===----------------------------------------------------------------------===// + +/// Profile - Used to gather unique data for the abbreviation folding set. +/// +void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(unsigned(Tag)); + ID.AddInteger(unsigned(Children)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) + Data[i].Profile(ID); +} + +/// Emit - Print the abbreviation using the specified asm printer. +/// +void DIEAbbrev::Emit(const AsmPrinter *AP) const { + // Emit its Dwarf tag type. + AP->EmitULEB128(Tag, dwarf::TagString(Tag)); + + // Emit whether it has children DIEs. + AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children)); + + // For each attribute description. + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + const DIEAbbrevData &AttrData = Data[i]; + + // Emit attribute type. + AP->EmitULEB128(AttrData.getAttribute(), + dwarf::AttributeString(AttrData.getAttribute())); + + // Emit form type. + AP->EmitULEB128(AttrData.getForm(), + dwarf::FormEncodingString(AttrData.getForm())); + } + + // Mark end of abbreviation. + AP->EmitULEB128(0, "EOM(1)"); + AP->EmitULEB128(0, "EOM(2)"); +} + +LLVM_DUMP_METHOD +void DIEAbbrev::print(raw_ostream &O) { + O << "Abbreviation @" + << format("0x%lx", (long)(intptr_t)this) + << " " + << dwarf::TagString(Tag) + << " " + << dwarf::ChildrenString(Children) + << '\n'; + + for (unsigned i = 0, N = Data.size(); i < N; ++i) { + O << " " + << dwarf::AttributeString(Data[i].getAttribute()) + << " " + << dwarf::FormEncodingString(Data[i].getForm()) + << '\n'; + } +} + +LLVM_DUMP_METHOD +void DIEAbbrev::dump() { print(dbgs()); } + +DIEAbbrev DIE::generateAbbrev() const { + DIEAbbrev Abbrev(Tag, hasChildren()); + for (const DIEValue &V : values()) + Abbrev.AddAttribute(V.getAttribute(), V.getForm()); + return Abbrev; +} + +/// Climb up the parent chain to get the unit DIE to which this DIE +/// belongs. +const DIE *DIE::getUnit() const { + const DIE *Cu = getUnitOrNull(); + assert(Cu && "We should not have orphaned DIEs."); + return Cu; +} + +/// Climb up the parent chain to get the unit DIE this DIE belongs +/// to. Return NULL if DIE is not added to an owner yet. +const DIE *DIE::getUnitOrNull() const { + const DIE *p = this; + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit || + p->getTag() == dwarf::DW_TAG_type_unit) + return p; + p = p->getParent(); + } + return nullptr; +} + +DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const { + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return NULL. + for (const auto &V : values()) + if (V.getAttribute() == Attribute) + return V; + return DIEValue(); +} + +LLVM_DUMP_METHOD +static void printValues(raw_ostream &O, const DIEValueList &Values, + StringRef Type, unsigned Size, unsigned IndentCount) { + O << Type << ": Size: " << Size << "\n"; + + unsigned I = 0; + const std::string Indent(IndentCount, ' '); + for (const auto &V : Values.values()) { + O << Indent; + O << "Blk[" << I++ << "]"; + O << " " << dwarf::FormEncodingString(V.getForm()) << " "; + V.print(O); + O << "\n"; + } +} + +LLVM_DUMP_METHOD +void DIE::print(raw_ostream &O, unsigned IndentCount) const { + const std::string Indent(IndentCount, ' '); + O << Indent << "Die: " << format("0x%lx", (long)(intptr_t) this) + << ", Offset: " << Offset << ", Size: " << Size << "\n"; + + O << Indent << dwarf::TagString(getTag()) << " " + << dwarf::ChildrenString(hasChildren()) << "\n"; + + IndentCount += 2; + for (const auto &V : values()) { + O << Indent; + O << dwarf::AttributeString(V.getAttribute()); + O << " " << dwarf::FormEncodingString(V.getForm()) << " "; + V.print(O); + O << "\n"; + } + IndentCount -= 2; + + for (const auto &Child : children()) + Child.print(O, IndentCount + 4); + + O << "\n"; +} + +LLVM_DUMP_METHOD +void DIE::dump() { + print(dbgs()); +} + +void DIEValue::EmitValue(const AsmPrinter *AP) const { + switch (Ty) { + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + getDIE##T().EmitValue(AP, Form); \ + break; +#include "llvm/CodeGen/DIEValue.def" + } +} + +unsigned DIEValue::SizeOf(const AsmPrinter *AP) const { + switch (Ty) { + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + return getDIE##T().SizeOf(AP, Form); +#include "llvm/CodeGen/DIEValue.def" + } + llvm_unreachable("Unknown DIE kind"); +} + +LLVM_DUMP_METHOD +void DIEValue::print(raw_ostream &O) const { + switch (Ty) { + case isNone: + llvm_unreachable("Expected valid DIEValue"); +#define HANDLE_DIEVALUE(T) \ + case is##T: \ + getDIE##T().print(O); \ + break; +#include "llvm/CodeGen/DIEValue.def" + } +} + +LLVM_DUMP_METHOD +void DIEValue::dump() const { + print(dbgs()); +} + +//===----------------------------------------------------------------------===// +// DIEInteger Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit integer of appropriate size. +/// +void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { + unsigned Size = ~0U; + switch (Form) { + case dwarf::DW_FORM_flag_present: + // Emit something to keep the lines and comments in sync. + // FIXME: Is there a better way to do this? + Asm->OutStreamer->AddBlankLine(); + return; + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: Size = 1; break; + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: Size = 2; break; + case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: Size = 4; break; + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru + case dwarf::DW_FORM_data8: Size = 8; break; + case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_addr: + Size = Asm->getPointerSize(); + break; + case dwarf::DW_FORM_ref_addr: + Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr); + break; + default: llvm_unreachable("DIE Value form not supported yet"); + } + Asm->OutStreamer->EmitIntValue(Integer, Size); +} + +/// SizeOf - Determine size of integer value in bytes. +/// +unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_flag_present: return 0; + case dwarf::DW_FORM_flag: // Fall thru + case dwarf::DW_FORM_ref1: // Fall thru + case dwarf::DW_FORM_data1: return sizeof(int8_t); + case dwarf::DW_FORM_ref2: // Fall thru + case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_sec_offset: // Fall thru + case dwarf::DW_FORM_strp: // Fall thru + case dwarf::DW_FORM_ref4: // Fall thru + case dwarf::DW_FORM_data4: return sizeof(int32_t); + case dwarf::DW_FORM_ref8: // Fall thru + case dwarf::DW_FORM_ref_sig8: // Fall thru + case dwarf::DW_FORM_data8: return sizeof(int64_t); + case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer); + case dwarf::DW_FORM_udata: return getULEB128Size(Integer); + case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: + return AP->getPointerSize(); + case dwarf::DW_FORM_ref_addr: + if (AP->OutStreamer->getContext().getDwarfVersion() == 2) + return AP->getPointerSize(); + return sizeof(int32_t); + default: llvm_unreachable("DIE Value form not supported yet"); + } +} + +LLVM_DUMP_METHOD +void DIEInteger::print(raw_ostream &O) const { + O << "Int: " << (int64_t)Integer << " 0x"; + O.write_hex(Integer); +} + +//===----------------------------------------------------------------------===// +// DIEExpr Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit expression value. +/// +void DIEExpr::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->OutStreamer->EmitValue(Expr, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of expression value in bytes. +/// +unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getPointerSize(); +} + +LLVM_DUMP_METHOD +void DIEExpr::print(raw_ostream &O) const { O << "Expr: " << *Expr; } + +//===----------------------------------------------------------------------===// +// DIELabel Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit label value. +/// +void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelReference(Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || + Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr); +} + +/// SizeOf - Determine size of label value in bytes. +/// +unsigned DIELabel::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getPointerSize(); +} + +LLVM_DUMP_METHOD +void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } + +//===----------------------------------------------------------------------===// +// DIEDelta Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit delta value. +/// +void DIEDelta::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEDelta::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getPointerSize(); +} + +LLVM_DUMP_METHOD +void DIEDelta::print(raw_ostream &O) const { + O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); +} + +//===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + assert( + (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && + "Expected valid string form"); + + // Index of string in symbol table. + if (Form == dwarf::DW_FORM_GNU_str_index) { + DIEInteger(S.getIndex()).EmitValue(AP, Form); + return; + } + + // Relocatable symbol. + assert(Form == dwarf::DW_FORM_strp); + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) { + DIELabel(S.getSymbol()).EmitValue(AP, Form); + return; + } + + // Offset into symbol table. + DIEInteger(S.getOffset()).EmitValue(AP, Form); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + assert( + (Form == dwarf::DW_FORM_strp || Form == dwarf::DW_FORM_GNU_str_index) && + "Expected valid string form"); + + // Index of string in symbol table. + if (Form == dwarf::DW_FORM_GNU_str_index) + return DIEInteger(S.getIndex()).SizeOf(AP, Form); + + // Relocatable symbol. + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + return DIELabel(S.getSymbol()).SizeOf(AP, Form); + + // Offset into symbol table. + return DIEInteger(S.getOffset()).SizeOf(AP, Form); +} + +LLVM_DUMP_METHOD +void DIEString::print(raw_ostream &O) const { + O << "String: " << S.getString(); +} + +//===----------------------------------------------------------------------===// +// DIEEntry Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit debug information entry offset. +/// +void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + + if (Form == dwarf::DW_FORM_ref_addr) { + const DwarfDebug *DD = AP->getDwarfDebug(); + unsigned Addr = Entry->getOffset(); + assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations."); + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Entry->getOffset() returns the offset from start of the + // compile unit. + DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) + AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr, + DIEEntry::getRefAddrSize(AP)); + else + AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP)); + } else + AP->EmitInt32(Entry->getOffset()); +} + +unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) { + // DWARF4: References that use the attribute form DW_FORM_ref_addr are + // specified to be four bytes in the DWARF 32-bit format and eight bytes + // in the DWARF 64-bit format, while DWARF Version 2 specifies that such + // references have the same size as an address on the target system. + const DwarfDebug *DD = AP->getDwarfDebug(); + assert(DD && "Expected Dwarf Debug info to be available"); + if (DD->getDwarfVersion() == 2) + return AP->getPointerSize(); + return sizeof(int32_t); +} + +LLVM_DUMP_METHOD +void DIEEntry::print(raw_ostream &O) const { + O << format("Die: 0x%lx", (long)(intptr_t)&Entry); +} + +//===----------------------------------------------------------------------===// +// DIETypeSignature Implementation +//===----------------------------------------------------------------------===// +void DIETypeSignature::EmitValue(const AsmPrinter *Asm, + dwarf::Form Form) const { + assert(Form == dwarf::DW_FORM_ref_sig8); + Asm->OutStreamer->EmitIntValue(Unit->getTypeSignature(), 8); +} + +LLVM_DUMP_METHOD +void DIETypeSignature::print(raw_ostream &O) const { + O << format("Type Unit: 0x%lx", Unit->getTypeSignature()); +} + +//===----------------------------------------------------------------------===// +// DIELoc Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the location expression. +/// +unsigned DIELoc::ComputeSize(const AsmPrinter *AP) const { + if (!Size) { + for (const auto &V : values()) + Size += V.SizeOf(AP); + } + + return Size; +} + +/// EmitValue - Emit location data. +/// +void DIELoc::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + Asm->EmitULEB128(Size); break; + } + + for (const auto &V : values()) + V.EmitValue(Asm); +} + +/// SizeOf - Determine size of location data in bytes. +/// +unsigned DIELoc::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: + case dwarf::DW_FORM_exprloc: + return Size + getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); + } +} + +LLVM_DUMP_METHOD +void DIELoc::print(raw_ostream &O) const { + printValues(O, *this, "ExprLoc", Size, 5); +} + +//===----------------------------------------------------------------------===// +// DIEBlock Implementation +//===----------------------------------------------------------------------===// + +/// ComputeSize - calculate the size of the block. +/// +unsigned DIEBlock::ComputeSize(const AsmPrinter *AP) const { + if (!Size) { + for (const auto &V : values()) + Size += V.SizeOf(AP); + } + + return Size; +} + +/// EmitValue - Emit block data. +/// +void DIEBlock::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const { + switch (Form) { + default: llvm_unreachable("Improper form for block"); + case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; + case dwarf::DW_FORM_block2: Asm->EmitInt16(Size); break; + case dwarf::DW_FORM_block4: Asm->EmitInt32(Size); break; + case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; + } + + for (const auto &V : values()) + V.EmitValue(Asm); +} + +/// SizeOf - Determine size of block data in bytes. +/// +unsigned DIEBlock::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + switch (Form) { + case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); + case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); + case dwarf::DW_FORM_block4: return Size + sizeof(int32_t); + case dwarf::DW_FORM_block: return Size + getULEB128Size(Size); + default: llvm_unreachable("Improper form for block"); + } +} + +LLVM_DUMP_METHOD +void DIEBlock::print(raw_ostream &O) const { + printValues(O, *this, "Blk", Size, 5); +} + +//===----------------------------------------------------------------------===// +// DIELocList Implementation +//===----------------------------------------------------------------------===// + +unsigned DIELocList::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) + return 4; + if (Form == dwarf::DW_FORM_sec_offset) + return 4; + return AP->getPointerSize(); +} + +/// EmitValue - Emit label value. +/// +void DIELocList::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const { + DwarfDebug *DD = AP->getDwarfDebug(); + MCSymbol *Label = DD->getDebugLocs().getList(Index).Label; + AP->emitDwarfSymbolReference(Label, /*ForceOffset*/ DD->useSplitDwarf()); +} + +LLVM_DUMP_METHOD +void DIELocList::print(raw_ostream &O) const { O << "LocList: " << Index; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp new file mode 100644 index 0000000..0201065 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -0,0 +1,515 @@ +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#include "ByteStreamer.h" +#include "DIEHash.h" +#include "DwarfDebug.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. +static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return an empty string. + for (const auto &V : Die.values()) + if (V.getAttribute() == Attr) + return V.getDIEString().getString(); + + return StringRef(""); +} + +/// \brief Adds the string in \p Str to the hash. This also hashes +/// a trailing NULL with the string. +void DIEHash::addString(StringRef Str) { + DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: The LEB128 routines are copied and only slightly modified out of +// LEB128.h. + +/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +void DIEHash::addULEB128(uint64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +void DIEHash::addSLEB128(int64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + bool More; + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + More = !((((Value == 0) && ((Byte & 0x40) == 0)) || + ((Value == -1) && ((Byte & 0x40) != 0)))); + if (More) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (More); +} + +/// \brief Including \p Parent adds the context of Parent to the hash.. +void DIEHash::addParentContext(const DIE &Parent) { + + DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector<const DIE *, 1> Parents; + const DIE *Cur = &Parent; + while (Cur->getParent()) { + Parents.push_back(Cur); + Cur = Cur->getParent(); + } + assert(Cur->getTag() == dwarf::DW_TAG_compile_unit || + Cur->getTag() == dwarf::DW_TAG_type_unit); + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + const DIE &Die = **I; + + // ... Append the letter "C" to the sequence... + addULEB128('C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128(Die.getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addString(Name); + } +} + +// Collect all of the attributes for a particular DIE in single structure. +void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { +#define COLLECT_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME = V; \ + break + + for (const auto &V : Die.values()) { + DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(V.getAttribute()) + << " added.\n"); + switch (V.getAttribute()) { + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); + default: + break; + } + } +} + +void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, + const DIE &Entry, StringRef Name) { + // append the letter 'N' + addULEB128('N'); + + // the DWARF attribute code (DW_AT_type or DW_AT_friend), + addULEB128(Attribute); + + // the context of the tag, + if (const DIE *Parent = Entry.getParent()) + addParentContext(*Parent); + + // the letter 'E', + addULEB128('E'); + + // and the name of the type. + addString(Name); + + // Currently DW_TAG_friends are not used by Clang, but if they do become so, + // here's the relevant spec text to implement: + // + // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, + // the context is omitted and the name to be used is the ABI-specific name + // of the subprogram (e.g., the mangled linker name). +} + +void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber) { + // a) If T is in the list of [previously hashed types], use the letter + // 'R' as the marker + addULEB128('R'); + + addULEB128(Attribute); + + // and use the unsigned LEB128 encoding of [the index of T in the + // list] as the attribute value; + addULEB128(DieNumber); +} + +void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry) { + assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " + "tags. Add support here when there's " + "a use case"); + // Step 5 + // If the tag in Step 3 is one of [the below tags] + if ((Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_ptr_to_member_type) && + // and the referenced type (via the [below attributes]) + // FIXME: This seems overly restrictive, and causes hash mismatches + // there's a decl/def difference in the containing type of a + // ptr_to_member_type, but it's what DWARF says, for some reason. + Attribute == dwarf::DW_AT_type) { + // ... has a DW_AT_name attribute, + StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); + if (!Name.empty()) { + hashShallowTypeReference(Attribute, Entry, Name); + return; + } + } + + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + hashRepeatedTypeReference(Attribute, DieNumber); + return; + } + + // otherwise, b) use the letter 'T' as the marker, ... + addULEB128('T'); + + addULEB128(Attribute); + + // ... process the type T recursively by performing Steps 2 through 7, and + // use the result as the attribute value. + DieNumber = Numbering.size(); + computeHash(Entry); +} + +// Hash all of the values in a block like set of values. This assumes that +// all of the data is going to be added as integers. +void DIEHash::hashBlockData(const DIE::const_value_range &Values) { + for (const auto &V : Values) + Hash.update((uint64_t)V.getDIEInteger().getValue()); +} + +// Hash the contents of a loclistptr class. +void DIEHash::hashLocList(const DIELocList &LocList) { + HashingByteStreamer Streamer(*this); + DwarfDebug &DD = *AP->getDwarfDebug(); + const DebugLocStream &Locs = DD.getDebugLocs(); + for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue()))) + DD.emitDebugLocEntry(Streamer, Entry); +} + +// Hash an individual attribute \param Attr based on the type of attribute and +// the form. +void DIEHash::hashAttribute(DIEValue Value, dwarf::Tag Tag) { + dwarf::Attribute Attribute = Value.getAttribute(); + + // Other attribute values use the letter 'A' as the marker, and the value + // consists of the form code (encoded as an unsigned LEB128 value) followed by + // the encoding of the value according to the form code. To ensure + // reproducibility of the signature, the set of forms used in the signature + // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag, + // DW_FORM_string, and DW_FORM_block. + + switch (Value.getType()) { + case DIEValue::isNone: + llvm_unreachable("Expected valid DIEValue"); + + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + case DIEValue::isEntry: + hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry()); + break; + case DIEValue::isInteger: { + addULEB128('A'); + addULEB128(Attribute); + switch (Value.getForm()) { + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + case dwarf::DW_FORM_sdata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)Value.getDIEInteger().getValue()); + break; + // DW_FORM_flag_present is just flag with a value of one. We still give it a + // value so just use the value. + case dwarf::DW_FORM_flag_present: + case dwarf::DW_FORM_flag: + addULEB128(dwarf::DW_FORM_flag); + addULEB128((int64_t)Value.getDIEInteger().getValue()); + break; + default: + llvm_unreachable("Unknown integer form!"); + } + break; + } + case DIEValue::isString: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_string); + addString(Value.getDIEString().getString()); + break; + case DIEValue::isBlock: + case DIEValue::isLoc: + case DIEValue::isLocList: + addULEB128('A'); + addULEB128(Attribute); + addULEB128(dwarf::DW_FORM_block); + if (Value.getType() == DIEValue::isBlock) { + addULEB128(Value.getDIEBlock().ComputeSize(AP)); + hashBlockData(Value.getDIEBlock().values()); + } else if (Value.getType() == DIEValue::isLoc) { + addULEB128(Value.getDIELoc().ComputeSize(AP)); + hashBlockData(Value.getDIELoc().values()); + } else { + // We could add the block length, but that would take + // a bit of work and not add a lot of uniqueness + // to the hash in some way we could test. + hashLocList(Value.getDIELocList()); + } + break; + // FIXME: It's uncertain whether or not we should handle this at the moment. + case DIEValue::isExpr: + case DIEValue::isLabel: + case DIEValue::isDelta: + case DIEValue::isTypeSignature: + llvm_unreachable("Add support for additional value types."); + } +} + +// Go through the attributes from \param Attrs in the order specified in 7.27.4 +// and hash them. +void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { +#define ADD_ATTR(ATTR) \ + { \ + if (ATTR) \ + hashAttribute(ATTR, Tag); \ + } + + ADD_ATTR(Attrs.DW_AT_name); + ADD_ATTR(Attrs.DW_AT_accessibility); + ADD_ATTR(Attrs.DW_AT_address_class); + ADD_ATTR(Attrs.DW_AT_allocated); + ADD_ATTR(Attrs.DW_AT_artificial); + ADD_ATTR(Attrs.DW_AT_associated); + ADD_ATTR(Attrs.DW_AT_binary_scale); + ADD_ATTR(Attrs.DW_AT_bit_offset); + ADD_ATTR(Attrs.DW_AT_bit_size); + ADD_ATTR(Attrs.DW_AT_bit_stride); + ADD_ATTR(Attrs.DW_AT_byte_size); + ADD_ATTR(Attrs.DW_AT_byte_stride); + ADD_ATTR(Attrs.DW_AT_const_expr); + ADD_ATTR(Attrs.DW_AT_const_value); + ADD_ATTR(Attrs.DW_AT_containing_type); + ADD_ATTR(Attrs.DW_AT_count); + ADD_ATTR(Attrs.DW_AT_data_bit_offset); + ADD_ATTR(Attrs.DW_AT_data_location); + ADD_ATTR(Attrs.DW_AT_data_member_location); + ADD_ATTR(Attrs.DW_AT_decimal_scale); + ADD_ATTR(Attrs.DW_AT_decimal_sign); + ADD_ATTR(Attrs.DW_AT_default_value); + ADD_ATTR(Attrs.DW_AT_digit_count); + ADD_ATTR(Attrs.DW_AT_discr); + ADD_ATTR(Attrs.DW_AT_discr_list); + ADD_ATTR(Attrs.DW_AT_discr_value); + ADD_ATTR(Attrs.DW_AT_encoding); + ADD_ATTR(Attrs.DW_AT_enum_class); + ADD_ATTR(Attrs.DW_AT_endianity); + ADD_ATTR(Attrs.DW_AT_explicit); + ADD_ATTR(Attrs.DW_AT_is_optional); + ADD_ATTR(Attrs.DW_AT_location); + ADD_ATTR(Attrs.DW_AT_lower_bound); + ADD_ATTR(Attrs.DW_AT_mutable); + ADD_ATTR(Attrs.DW_AT_ordering); + ADD_ATTR(Attrs.DW_AT_picture_string); + ADD_ATTR(Attrs.DW_AT_prototyped); + ADD_ATTR(Attrs.DW_AT_small); + ADD_ATTR(Attrs.DW_AT_segment); + ADD_ATTR(Attrs.DW_AT_string_length); + ADD_ATTR(Attrs.DW_AT_threads_scaled); + ADD_ATTR(Attrs.DW_AT_upper_bound); + ADD_ATTR(Attrs.DW_AT_use_location); + ADD_ATTR(Attrs.DW_AT_use_UTF8); + ADD_ATTR(Attrs.DW_AT_variable_parameter); + ADD_ATTR(Attrs.DW_AT_virtuality); + ADD_ATTR(Attrs.DW_AT_visibility); + ADD_ATTR(Attrs.DW_AT_vtable_elem_location); + ADD_ATTR(Attrs.DW_AT_type); + + // FIXME: Add the extended attributes. +} + +// Add all of the attributes for \param Die to the hash. +void DIEHash::addAttributes(const DIE &Die) { + DIEAttrs Attrs = {}; + collectAttributes(Die, Attrs); + hashAttributes(Attrs, Die.getTag()); +} + +void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { + // 7.27 Step 7 + // ... append the letter 'S', + addULEB128('S'); + + // the tag of C, + addULEB128(Die.getTag()); + + // and the name. + addString(Name); +} + +// Compute the hash of a DIE. This is based on the type signature computation +// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a +// flattened description of the DIE. +void DIEHash::computeHash(const DIE &Die) { + // Append the letter 'D', followed by the DWARF tag of the DIE. + addULEB128('D'); + addULEB128(Die.getTag()); + + // Add each of the attributes of the DIE. + addAttributes(Die); + + // Then hash each of the children of the DIE. + for (auto &C : Die.children()) { + // 7.27 Step 7 + // If C is a nested type entry or a member function entry, ... + if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name); + // ... and has a DW_AT_name attribute + if (!Name.empty()) { + hashNestedType(C, Name); + continue; + } + } + computeHash(C); + } + + // Following the last (or if there are no children), append a zero byte. + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of the full CU and all top level CU entities. +// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. +uint64_t DIEHash::computeCUSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return support::endian::read64le(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of additional forms not specifically called out in the +/// standard. +uint64_t DIEHash::computeTypeSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return support::endian::read64le(Result + 8); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h new file mode 100644 index 0000000..44f0ce8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -0,0 +1,159 @@ +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DIEHASH_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +class AsmPrinter; +class CompileUnit; + +/// \brief An object containing the capability of hashing and adding hash +/// attributes onto a DIE. +class DIEHash { + // Collection of all attributes used in hashing a particular DIE. + struct DIEAttrs { + DIEValue DW_AT_name; + DIEValue DW_AT_accessibility; + DIEValue DW_AT_address_class; + DIEValue DW_AT_allocated; + DIEValue DW_AT_artificial; + DIEValue DW_AT_associated; + DIEValue DW_AT_binary_scale; + DIEValue DW_AT_bit_offset; + DIEValue DW_AT_bit_size; + DIEValue DW_AT_bit_stride; + DIEValue DW_AT_byte_size; + DIEValue DW_AT_byte_stride; + DIEValue DW_AT_const_expr; + DIEValue DW_AT_const_value; + DIEValue DW_AT_containing_type; + DIEValue DW_AT_count; + DIEValue DW_AT_data_bit_offset; + DIEValue DW_AT_data_location; + DIEValue DW_AT_data_member_location; + DIEValue DW_AT_decimal_scale; + DIEValue DW_AT_decimal_sign; + DIEValue DW_AT_default_value; + DIEValue DW_AT_digit_count; + DIEValue DW_AT_discr; + DIEValue DW_AT_discr_list; + DIEValue DW_AT_discr_value; + DIEValue DW_AT_encoding; + DIEValue DW_AT_enum_class; + DIEValue DW_AT_endianity; + DIEValue DW_AT_explicit; + DIEValue DW_AT_is_optional; + DIEValue DW_AT_location; + DIEValue DW_AT_lower_bound; + DIEValue DW_AT_mutable; + DIEValue DW_AT_ordering; + DIEValue DW_AT_picture_string; + DIEValue DW_AT_prototyped; + DIEValue DW_AT_small; + DIEValue DW_AT_segment; + DIEValue DW_AT_string_length; + DIEValue DW_AT_threads_scaled; + DIEValue DW_AT_upper_bound; + DIEValue DW_AT_use_location; + DIEValue DW_AT_use_UTF8; + DIEValue DW_AT_variable_parameter; + DIEValue DW_AT_virtuality; + DIEValue DW_AT_visibility; + DIEValue DW_AT_vtable_elem_location; + DIEValue DW_AT_type; + + // Insert any additional ones here... + }; + +public: + DIEHash(AsmPrinter *A = nullptr) : AP(A) {} + + /// \brief Computes the CU signature. + uint64_t computeCUSignature(const DIE &Die); + + /// \brief Computes the type signature. + uint64_t computeTypeSignature(const DIE &Die); + + // Helper routines to process parts of a DIE. +private: + /// \brief Adds the parent context of \param Die to the hash. + void addParentContext(const DIE &Die); + + /// \brief Adds the attributes of \param Die to the hash. + void addAttributes(const DIE &Die); + + /// \brief Computes the full DWARF4 7.27 hash of the DIE. + void computeHash(const DIE &Die); + + // Routines that add DIEValues to the hash. +public: + /// \brief Adds \param Value to the hash. + void update(uint8_t Value) { Hash.update(Value); } + + /// \brief Encodes and adds \param Value to the hash as a ULEB128. + void addULEB128(uint64_t Value); + + /// \brief Encodes and adds \param Value to the hash as a SLEB128. + void addSLEB128(int64_t Value); + +private: + /// \brief Adds \param Str to the hash and includes a NULL byte. + void addString(StringRef Str); + + /// \brief Collects the attributes of DIE \param Die into the \param Attrs + /// structure. + void collectAttributes(const DIE &Die, DIEAttrs &Attrs); + + /// \brief Hashes the attributes in \param Attrs in order. + void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + + /// \brief Hashes the data in a block like DIEValue, e.g. DW_FORM_block or + /// DW_FORM_exprloc. + void hashBlockData(const DIE::const_value_range &Values); + + /// \brief Hashes the contents pointed to in the .debug_loc section. + void hashLocList(const DIELocList &LocList); + + /// \brief Hashes an individual attribute. + void hashAttribute(DIEValue Value, dwarf::Tag Tag); + + /// \brief Hashes an attribute that refers to another DIE. + void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry); + + /// \brief Hashes a reference to a named type in such a way that is + /// independent of whether that type is described by a declaration or a + /// definition. + void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, + StringRef Name); + + /// \brief Hashes a reference to a previously referenced type DIE. + void hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber); + + void hashNestedType(const DIE &Die, StringRef Name); + +private: + MD5 Hash; + AsmPrinter *AP; + DenseMap<const DIE *, unsigned> Numbering; +}; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp new file mode 100644 index 0000000..3c46a99 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -0,0 +1,232 @@ +//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DbgValueHistoryCalculator.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +#include <map> +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +// \brief If @MI is a DBG_VALUE with debug value described by a +// defined register, returns the number of this register. +// In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 4); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +void DbgValueHistoryMap::startInstrRange(InlinedVariable Var, + const MachineInstr &MI) { + // Instruction range should start with a DBG_VALUE instruction for the + // variable. + assert(MI.isDebugValue() && "not a DBG_VALUE"); + auto &Ranges = VarInstrRanges[Var]; + if (!Ranges.empty() && Ranges.back().second == nullptr && + Ranges.back().first->isIdenticalTo(&MI)) { + DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" + << "\t" << Ranges.back().first << "\t" << MI << "\n"); + return; + } + Ranges.push_back(std::make_pair(&MI, nullptr)); +} + +void DbgValueHistoryMap::endInstrRange(InlinedVariable Var, + const MachineInstr &MI) { + auto &Ranges = VarInstrRanges[Var]; + // Verify that the current instruction range is not yet closed. + assert(!Ranges.empty() && Ranges.back().second == nullptr); + // For now, instruction ranges are not allowed to cross basic block + // boundaries. + assert(Ranges.back().first->getParent() == MI.getParent()); + Ranges.back().second = &MI; +} + +unsigned DbgValueHistoryMap::getRegisterForVar(InlinedVariable Var) const { + const auto &I = VarInstrRanges.find(Var); + if (I == VarInstrRanges.end()) + return 0; + const auto &Ranges = I->second; + if (Ranges.empty() || Ranges.back().second != nullptr) + return 0; + return isDescribedByReg(*Ranges.back().first); +} + +namespace { +// Maps physreg numbers to the variables they describe. +typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; +typedef std::map<unsigned, SmallVector<InlinedVariable, 1>> RegDescribedVarsMap; +} + +// \brief Claim that @Var is not described by @RegNo anymore. +static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedVariable Var) { + const auto &I = RegVars.find(RegNo); + assert(RegNo != 0U && I != RegVars.end()); + auto &VarSet = I->second; + const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var); + assert(VarPos != VarSet.end()); + VarSet.erase(VarPos); + // Don't keep empty sets in a map to keep it as small as possible. + if (VarSet.empty()) + RegVars.erase(I); +} + +// \brief Claim that @Var is now described by @RegNo. +static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo, + InlinedVariable Var) { + assert(RegNo != 0U); + auto &VarSet = RegVars[RegNo]; + assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end()); + VarSet.push_back(Var); +} + +// \brief Terminate the location range for variables described by register at +// @I by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, + RegDescribedVarsMap::iterator I, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + // Iterate over all variables described by this register and add this + // instruction to their history, clobbering it. + for (const auto &Var : I->second) + HistMap.endInstrRange(Var, ClobberingInstr); + RegVars.erase(I); +} + +// \brief Terminate the location range for variables described by register +// @RegNo by inserting @ClobberingInstr to their history. +static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, + DbgValueHistoryMap &HistMap, + const MachineInstr &ClobberingInstr) { + const auto &I = RegVars.find(RegNo); + if (I == RegVars.end()) + return; + clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr); +} + +// \brief Collect all registers clobbered by @MI and apply the functor +// @Func to their RegNo. +// @Func should be a functor with a void(unsigned) signature. We're +// not using std::function here for performance reasons. It has a +// small but measurable impact. By using a functor instead of a +// std::set& here, we can avoid the overhead of constructing +// temporaries in calculateDbgValueHistory, which has a significant +// performance impact. +template<typename Callable> +static void applyToClobberedRegisters(const MachineInstr &MI, + const TargetRegisterInfo *TRI, + Callable Func) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.isDef() || !MO.getReg()) + continue; + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Func(*AI); + } +} + +// \brief Returns the first instruction in @MBB which corresponds to +// the function epilogue, or nullptr if @MBB doesn't contain an epilogue. +static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { + auto LastMI = MBB.getLastNonDebugInstr(); + if (LastMI == MBB.end() || !LastMI->isReturn()) + return nullptr; + // Assume that epilogue starts with instruction having the same debug location + // as the return instruction. + DebugLoc LastLoc = LastMI->getDebugLoc(); + auto Res = LastMI; + for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)), + E = MBB.rend(); + I != E; ++I) { + if (I->getDebugLoc() != LastLoc) + return Res; + Res = &*I; + } + // If all instructions have the same debug location, assume whole MBB is + // an epilogue. + return MBB.begin(); +} + +// \brief Collect registers that are modified in the function body (their +// contents is changed outside of the prologue and epilogue). +static void collectChangingRegs(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + BitVector &Regs) { + for (const auto &MBB : *MF) { + auto FirstEpilogueInst = getFirstEpilogueInst(MBB); + + for (const auto &MI : MBB) { + if (&MI == FirstEpilogueInst) + break; + if (!MI.getFlag(MachineInstr::FrameSetup)) + applyToClobberedRegisters(MI, TRI, [&](unsigned r) { Regs.set(r); }); + } + } +} + +void llvm::calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result) { + BitVector ChangingRegs(TRI->getNumRegs()); + collectChangingRegs(MF, TRI, ChangingRegs); + + RegDescribedVarsMap RegVars; + for (const auto &MBB : *MF) { + for (const auto &MI : MBB) { + if (!MI.isDebugValue()) { + // Not a DBG_VALUE instruction. It may clobber registers which describe + // some variables. + applyToClobberedRegisters(MI, TRI, [&](unsigned RegNo) { + if (ChangingRegs.test(RegNo)) + clobberRegisterUses(RegVars, RegNo, Result, MI); + }); + continue; + } + + assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!"); + // Use the base variable (without any DW_OP_piece expressions) + // as index into History. The full variables including the + // piece expressions are attached to the MI. + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + InlinedVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); + + if (unsigned PrevReg = Result.getRegisterForVar(Var)) + dropRegDescribedVar(RegVars, PrevReg, Var); + + Result.startInstrRange(Var, MI); + + if (unsigned NewReg = isDescribedByReg(MI)) + addRegDescribedVar(RegVars, NewReg, Var); + } + + // Make sure locations for register-described variables are valid only + // until the end of the basic block (unless it's the last basic block, in + // which case let their liveness run off to the end of the function). + if (!MBB.empty() && &MBB != &MF->back()) { + for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) { + auto CurElem = I++; // CurElem can be erased below. + if (ChangingRegs.test(CurElem->first)) + clobberRegisterUses(RegVars, CurElem, Result, MBB.back()); + } + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h new file mode 100644 index 0000000..546d1b4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h @@ -0,0 +1,60 @@ +//===-- llvm/CodeGen/AsmPrinter/DbgValueHistoryCalculator.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DBGVALUEHISTORYCALCULATOR_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class MachineFunction; +class MachineInstr; +class DILocalVariable; +class DILocation; +class TargetRegisterInfo; + +// For each user variable, keep a list of instruction ranges where this variable +// is accessible. The variables are listed in order of appearance. +class DbgValueHistoryMap { + // Each instruction range starts with a DBG_VALUE instruction, specifying the + // location of a variable, which is assumed to be valid until the end of the + // range. If end is not specified, location is valid until the start + // instruction of the next instruction range, or until the end of the + // function. +public: + typedef std::pair<const MachineInstr *, const MachineInstr *> InstrRange; + typedef SmallVector<InstrRange, 4> InstrRanges; + typedef std::pair<const DILocalVariable *, const DILocation *> + InlinedVariable; + typedef MapVector<InlinedVariable, InstrRanges> InstrRangesMap; + +private: + InstrRangesMap VarInstrRanges; + +public: + void startInstrRange(InlinedVariable Var, const MachineInstr &MI); + void endInstrRange(InlinedVariable Var, const MachineInstr &MI); + // Returns register currently describing @Var. If @Var is currently + // unaccessible or is not described by a register, returns 0. + unsigned getRegisterForVar(InlinedVariable Var) const; + + bool empty() const { return VarInstrRanges.empty(); } + void clear() { VarInstrRanges.clear(); } + InstrRangesMap::const_iterator begin() const { return VarInstrRanges.begin(); } + InstrRangesMap::const_iterator end() const { return VarInstrRanges.end(); } +}; + +void calculateDbgValueHistory(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + DbgValueHistoryMap &Result); +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h new file mode 100644 index 0000000..bbe5324 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h @@ -0,0 +1,181 @@ +//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCENTRY_H + +#include "DebugLocStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" + +namespace llvm { +class AsmPrinter; + +/// \brief This struct describes location entries emitted in the .debug_loc +/// section. +class DebugLocEntry { + /// Begin and end symbols for the address range that this location is valid. + const MCSymbol *Begin; + const MCSymbol *End; + +public: + /// \brief A single location or constant. + struct Value { + Value(const DIExpression *Expr, int64_t i) + : Expression(Expr), EntryKind(E_Integer) { + Constant.Int = i; + } + Value(const DIExpression *Expr, const ConstantFP *CFP) + : Expression(Expr), EntryKind(E_ConstantFP) { + Constant.CFP = CFP; + } + Value(const DIExpression *Expr, const ConstantInt *CIP) + : Expression(Expr), EntryKind(E_ConstantInt) { + Constant.CIP = CIP; + } + Value(const DIExpression *Expr, MachineLocation Loc) + : Expression(Expr), EntryKind(E_Location), Loc(Loc) { + assert(cast<DIExpression>(Expr)->isValid()); + } + + /// Any complex address location expression for this Value. + const DIExpression *Expression; + + /// Type of entry that this represents. + enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt }; + enum EntryType EntryKind; + + /// Either a constant, + union { + int64_t Int; + const ConstantFP *CFP; + const ConstantInt *CIP; + } Constant; + + // Or a location in the machine frame. + MachineLocation Loc; + + bool isLocation() const { return EntryKind == E_Location; } + bool isInt() const { return EntryKind == E_Integer; } + bool isConstantFP() const { return EntryKind == E_ConstantFP; } + bool isConstantInt() const { return EntryKind == E_ConstantInt; } + int64_t getInt() const { return Constant.Int; } + const ConstantFP *getConstantFP() const { return Constant.CFP; } + const ConstantInt *getConstantInt() const { return Constant.CIP; } + MachineLocation getLoc() const { return Loc; } + bool isBitPiece() const { return getExpression()->isBitPiece(); } + const DIExpression *getExpression() const { return Expression; } + friend bool operator==(const Value &, const Value &); + friend bool operator<(const Value &, const Value &); + }; + +private: + /// A nonempty list of locations/constants belonging to this entry, + /// sorted by offset. + SmallVector<Value, 1> Values; + +public: + DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val) + : Begin(B), End(E) { + Values.push_back(std::move(Val)); + } + + /// \brief If this and Next are describing different pieces of the same + /// variable, merge them by appending Next's values to the current + /// list of values. + /// Return true if the merge was successful. + bool MergeValues(const DebugLocEntry &Next) { + if (Begin == Next.Begin) { + auto *Expr = cast_or_null<DIExpression>(Values[0].Expression); + auto *NextExpr = cast_or_null<DIExpression>(Next.Values[0].Expression); + if (Expr->isBitPiece() && NextExpr->isBitPiece()) { + addValues(Next.Values); + End = Next.End; + return true; + } + } + return false; + } + + /// \brief Attempt to merge this DebugLocEntry with Next and return + /// true if the merge was successful. Entries can be merged if they + /// share the same Loc/Constant and if Next immediately follows this + /// Entry. + bool MergeRanges(const DebugLocEntry &Next) { + // If this and Next are describing the same variable, merge them. + if ((End == Next.Begin && Values == Next.Values)) { + End = Next.End; + return true; + } + return false; + } + + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + ArrayRef<Value> getValues() const { return Values; } + void addValues(ArrayRef<DebugLocEntry::Value> Vals) { + Values.append(Vals.begin(), Vals.end()); + sortUniqueValues(); + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){ + return V.isBitPiece(); + }) && "value must be a piece"); + } + + // \brief Sort the pieces by offset. + // Remove any duplicate entries by dropping all but the first. + void sortUniqueValues() { + std::sort(Values.begin(), Values.end()); + Values.erase( + std::unique( + Values.begin(), Values.end(), [](const Value &A, const Value &B) { + return A.getExpression() == B.getExpression(); + }), + Values.end()); + } + + /// \brief Lower this entry into a DWARF expression. + void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List, + const DIBasicType *BT); +}; + +/// \brief Compare two Values for equality. +inline bool operator==(const DebugLocEntry::Value &A, + const DebugLocEntry::Value &B) { + if (A.EntryKind != B.EntryKind) + return false; + + if (A.Expression != B.Expression) + return false; + + switch (A.EntryKind) { + case DebugLocEntry::Value::E_Location: + return A.Loc == B.Loc; + case DebugLocEntry::Value::E_Integer: + return A.Constant.Int == B.Constant.Int; + case DebugLocEntry::Value::E_ConstantFP: + return A.Constant.CFP == B.Constant.CFP; + case DebugLocEntry::Value::E_ConstantInt: + return A.Constant.CIP == B.Constant.CIP; + } + llvm_unreachable("unhandled EntryKind"); +} + +/// \brief Compare two pieces based on their offset. +inline bool operator<(const DebugLocEntry::Value &A, + const DebugLocEntry::Value &B) { + return A.getExpression()->getBitPieceOffset() < + B.getExpression()->getBitPieceOffset(); +} + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp new file mode 100644 index 0000000..7e8ed71 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.cpp @@ -0,0 +1,46 @@ +//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DebugLocStream.h" +#include "DwarfDebug.h" +#include "llvm/CodeGen/AsmPrinter.h" + +using namespace llvm; + +bool DebugLocStream::finalizeList(AsmPrinter &Asm) { + if (Lists.back().EntryOffset == Entries.size()) { + // Empty list. Delete it. + Lists.pop_back(); + return false; + } + + // Real list. Generate a label for it. + Lists.back().Label = Asm.createTempSymbol("debug_loc"); + return true; +} + +void DebugLocStream::finalizeEntry() { + if (Entries.back().ByteOffset != DWARFBytes.size()) + return; + + // The last entry was empty. Delete it. + Comments.erase(Comments.begin() + Entries.back().CommentOffset, + Comments.end()); + Entries.pop_back(); + + assert(Lists.back().EntryOffset <= Entries.size() && + "Popped off more entries than are in the list"); +} + +DebugLocStream::ListBuilder::~ListBuilder() { + if (!Locs.finalizeList(Asm)) + return; + V.initializeDbgValue(&MI); + V.setDebugLocListIndex(ListIndex); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h new file mode 100644 index 0000000..3656e9d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocStream.h @@ -0,0 +1,193 @@ +//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DEBUGLOCSTREAM_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "ByteStreamer.h" + +namespace llvm { + +class AsmPrinter; +class DbgVariable; +class DwarfCompileUnit; +class MachineInstr; +class MCSymbol; + +/// \brief Byte stream of .debug_loc entries. +/// +/// Stores a unified stream of .debug_loc entries. There's \a List for each +/// variable/inlined-at pair, and an \a Entry for each \a DebugLocEntry. +/// +/// FIXME: Do we need all these temp symbols? +/// FIXME: Why not output directly to the output stream? +class DebugLocStream { +public: + struct List { + DwarfCompileUnit *CU; + MCSymbol *Label = nullptr; + size_t EntryOffset; + List(DwarfCompileUnit *CU, size_t EntryOffset) + : CU(CU), EntryOffset(EntryOffset) {} + }; + struct Entry { + const MCSymbol *BeginSym; + const MCSymbol *EndSym; + size_t ByteOffset; + size_t CommentOffset; + Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset, + size_t CommentOffset) + : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset), + CommentOffset(CommentOffset) {} + }; + +private: + SmallVector<List, 4> Lists; + SmallVector<Entry, 32> Entries; + SmallString<256> DWARFBytes; + SmallVector<std::string, 32> Comments; + + /// \brief Only verbose textual output needs comments. This will be set to + /// true for that case, and false otherwise. + bool GenerateComments; + +public: + DebugLocStream(bool GenerateComments) : GenerateComments(GenerateComments) { } + size_t getNumLists() const { return Lists.size(); } + const List &getList(size_t LI) const { return Lists[LI]; } + ArrayRef<List> getLists() const { return Lists; } + + class ListBuilder; + class EntryBuilder; + +private: + /// \brief Start a new .debug_loc entry list. + /// + /// Start a new .debug_loc entry list. Return the new list's index so it can + /// be retrieved later via \a getList(). + /// + /// Until the next call, \a startEntry() will add entries to this list. + size_t startList(DwarfCompileUnit *CU) { + size_t LI = Lists.size(); + Lists.emplace_back(CU, Entries.size()); + return LI; + } + + /// Finalize a .debug_loc entry list. + /// + /// If there are no entries in this list, delete it outright. Otherwise, + /// create a label with \a Asm. + /// + /// \return false iff the list is deleted. + bool finalizeList(AsmPrinter &Asm); + + /// \brief Start a new .debug_loc entry. + /// + /// Until the next call, bytes added to the stream will be added to this + /// entry. + void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) { + Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size()); + } + + /// Finalize a .debug_loc entry, deleting if it's empty. + void finalizeEntry(); + +public: + BufferByteStreamer getStreamer() { + return BufferByteStreamer(DWARFBytes, Comments, GenerateComments); + } + + ArrayRef<Entry> getEntries(const List &L) const { + size_t LI = getIndex(L); + return makeArrayRef(Entries) + .slice(Lists[LI].EntryOffset, getNumEntries(LI)); + } + + ArrayRef<char> getBytes(const Entry &E) const { + size_t EI = getIndex(E); + return makeArrayRef(DWARFBytes.begin(), DWARFBytes.end()) + .slice(Entries[EI].ByteOffset, getNumBytes(EI)); + } + ArrayRef<std::string> getComments(const Entry &E) const { + size_t EI = getIndex(E); + return makeArrayRef(Comments) + .slice(Entries[EI].CommentOffset, getNumComments(EI)); + } + +private: + size_t getIndex(const List &L) const { + assert(&Lists.front() <= &L && &L <= &Lists.back() && + "Expected valid list"); + return &L - &Lists.front(); + } + size_t getIndex(const Entry &E) const { + assert(&Entries.front() <= &E && &E <= &Entries.back() && + "Expected valid entry"); + return &E - &Entries.front(); + } + size_t getNumEntries(size_t LI) const { + if (LI + 1 == Lists.size()) + return Entries.size() - Lists[LI].EntryOffset; + return Lists[LI + 1].EntryOffset - Lists[LI].EntryOffset; + } + size_t getNumBytes(size_t EI) const { + if (EI + 1 == Entries.size()) + return DWARFBytes.size() - Entries[EI].ByteOffset; + return Entries[EI + 1].ByteOffset - Entries[EI].ByteOffset; + } + size_t getNumComments(size_t EI) const { + if (EI + 1 == Entries.size()) + return Comments.size() - Entries[EI].CommentOffset; + return Entries[EI + 1].CommentOffset - Entries[EI].CommentOffset; + } +}; + +/// Builder for DebugLocStream lists. +class DebugLocStream::ListBuilder { + DebugLocStream &Locs; + AsmPrinter &Asm; + DbgVariable &V; + const MachineInstr &MI; + size_t ListIndex; + +public: + ListBuilder(DebugLocStream &Locs, DwarfCompileUnit &CU, AsmPrinter &Asm, + DbgVariable &V, const MachineInstr &MI) + : Locs(Locs), Asm(Asm), V(V), MI(MI), ListIndex(Locs.startList(&CU)) {} + + /// Finalize the list. + /// + /// If the list is empty, delete it. Otherwise, finalize it by creating a + /// temp symbol in \a Asm and setting up the \a DbgVariable. + ~ListBuilder(); + + DebugLocStream &getLocs() { return Locs; } +}; + +/// Builder for DebugLocStream entries. +class DebugLocStream::EntryBuilder { + DebugLocStream &Locs; + +public: + EntryBuilder(ListBuilder &List, const MCSymbol *Begin, const MCSymbol *End) + : Locs(List.getLocs()) { + Locs.startEntry(Begin, End); + } + + /// Finalize the entry, deleting it if it's empty. + ~EntryBuilder() { Locs.finalizeEntry(); } + + BufferByteStreamer getStreamer() { return Locs.getStreamer(); } +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp new file mode 100644 index 0000000..4ad3e18 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -0,0 +1,289 @@ +//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "DwarfAccelTable.h" +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +// The length of the header data is always going to be 4 + 4 + 4*NumAtoms. +DwarfAccelTable::DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom> atomList) + : Header(8 + (atomList.size() * 4)), HeaderData(atomList), + Entries(Allocator) {} + +void DwarfAccelTable::AddName(DwarfStringPoolEntryRef Name, const DIE *die, + char Flags) { + assert(Data.empty() && "Already finalized!"); + // If the string is in the list already then add this die to the list + // otherwise add a new one. + DataArray &DIEs = Entries[Name.getString()]; + assert(!DIEs.Name || DIEs.Name == Name); + DIEs.Name = Name; + DIEs.Values.push_back(new (Allocator) HashDataContents(die, Flags)); +} + +void DwarfAccelTable::ComputeBucketCount() { + // First get the number of unique hashes. + std::vector<uint32_t> uniques(Data.size()); + for (size_t i = 0, e = Data.size(); i < e; ++i) + uniques[i] = Data[i]->HashValue; + array_pod_sort(uniques.begin(), uniques.end()); + std::vector<uint32_t>::iterator p = + std::unique(uniques.begin(), uniques.end()); + uint32_t num = std::distance(uniques.begin(), p); + + // Then compute the bucket size, minimum of 1 bucket. + if (num > 1024) + Header.bucket_count = num / 4; + else if (num > 16) + Header.bucket_count = num / 2; + else + Header.bucket_count = num > 0 ? num : 1; + + Header.hashes_count = num; +} + +// compareDIEs - comparison predicate that sorts DIEs by their offset. +static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, + const DwarfAccelTable::HashDataContents *B) { + return A->Die->getOffset() < B->Die->getOffset(); +} + +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { + // Create the individual hash data outputs. + Data.reserve(Entries.size()); + for (StringMap<DataArray>::iterator EI = Entries.begin(), EE = Entries.end(); + EI != EE; ++EI) { + + // Unique the entries. + std::stable_sort(EI->second.Values.begin(), EI->second.Values.end(), compareDIEs); + EI->second.Values.erase( + std::unique(EI->second.Values.begin(), EI->second.Values.end()), + EI->second.Values.end()); + + HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); + Data.push_back(Entry); + } + + // Figure out how many buckets we need, then compute the bucket + // contents and the final ordering. We'll emit the hashes and offsets + // by doing a walk during the emission phase. We add temporary + // symbols to the data so that we can reference them during the offset + // later, we'll emit them when we emit the data. + ComputeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(Header.bucket_count); + for (size_t i = 0, e = Data.size(); i < e; ++i) { + uint32_t bucket = Data[i]->HashValue % Header.bucket_count; + Buckets[bucket].push_back(Data[i]); + Data[i]->Sym = Asm->createTempSymbol(Prefix); + } + + // Sort the contents of the buckets by hash value so that hash + // collisions end up together. Stable sort makes testing easier and + // doesn't cost much more. + for (size_t i = 0; i < Buckets.size(); ++i) + std::stable_sort(Buckets[i].begin(), Buckets[i].end(), + [] (HashData *LHS, HashData *RHS) { + return LHS->HashValue < RHS->HashValue; + }); +} + +// Emits the header for the table via the AsmPrinter. +void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { + Asm->OutStreamer->AddComment("Header Magic"); + Asm->EmitInt32(Header.magic); + Asm->OutStreamer->AddComment("Header Version"); + Asm->EmitInt16(Header.version); + Asm->OutStreamer->AddComment("Header Hash Function"); + Asm->EmitInt16(Header.hash_function); + Asm->OutStreamer->AddComment("Header Bucket Count"); + Asm->EmitInt32(Header.bucket_count); + Asm->OutStreamer->AddComment("Header Hash Count"); + Asm->EmitInt32(Header.hashes_count); + Asm->OutStreamer->AddComment("Header Data Length"); + Asm->EmitInt32(Header.header_data_len); + Asm->OutStreamer->AddComment("HeaderData Die Offset Base"); + Asm->EmitInt32(HeaderData.die_offset_base); + Asm->OutStreamer->AddComment("HeaderData Atom Count"); + Asm->EmitInt32(HeaderData.Atoms.size()); + for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { + Atom A = HeaderData.Atoms[i]; + Asm->OutStreamer->AddComment(dwarf::AtomTypeString(A.type)); + Asm->EmitInt16(A.type); + Asm->OutStreamer->AddComment(dwarf::FormEncodingString(A.form)); + Asm->EmitInt16(A.form); + } +} + +// Walk through and emit the buckets for the table. Each index is +// an offset into the list of hashes. +void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { + unsigned index = 0; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + Asm->OutStreamer->AddComment("Bucket " + Twine(i)); + if (Buckets[i].size() != 0) + Asm->EmitInt32(index); + else + Asm->EmitInt32(UINT32_MAX); + // Buckets point in the list of hashes, not to the data. Do not + // increment the index multiple times in case of hash collisions. + uint64_t PrevHash = UINT64_MAX; + for (auto *HD : Buckets[i]) { + uint32_t HashValue = HD->HashValue; + if (PrevHash != HashValue) + ++index; + PrevHash = HashValue; + } + } +} + +// Walk through the buckets and emit the individual hashes for each +// bucket. +void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); + HI != HE; ++HI) { + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; + Asm->OutStreamer->AddComment("Hash in Bucket " + Twine(i)); + Asm->EmitInt32(HashValue); + PrevHash = HashValue; + } + } +} + +// Walk through the buckets and emit the individual offsets for each +// element in each bucket. This is done via a symbol subtraction from the +// beginning of the section. The non-section symbol will be output later +// when we emit the actual data. +void DwarfAccelTable::emitOffsets(AsmPrinter *Asm, const MCSymbol *SecBegin) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); + HI != HE; ++HI) { + uint32_t HashValue = (*HI)->HashValue; + if (PrevHash == HashValue) + continue; + PrevHash = HashValue; + Asm->OutStreamer->AddComment("Offset in Bucket " + Twine(i)); + MCContext &Context = Asm->OutStreamer->getContext(); + const MCExpr *Sub = MCBinaryExpr::createSub( + MCSymbolRefExpr::create((*HI)->Sym, Context), + MCSymbolRefExpr::create(SecBegin, Context), Context); + Asm->OutStreamer->EmitValue(Sub, sizeof(uint32_t)); + } + } +} + +// Walk through the buckets and emit the full data for each element in +// the bucket. For the string case emit the dies and the various offsets. +// Terminate each HashData bucket with 0. +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { + uint64_t PrevHash = UINT64_MAX; + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); + HI != HE; ++HI) { + // Terminate the previous entry if there is no hash collision + // with the current one. + if (PrevHash != UINT64_MAX && PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); + // Remember to emit the label for our offset. + Asm->OutStreamer->EmitLabel((*HI)->Sym); + Asm->OutStreamer->AddComment((*HI)->Str); + Asm->emitDwarfStringOffset((*HI)->Data.Name); + Asm->OutStreamer->AddComment("Num DIEs"); + Asm->EmitInt32((*HI)->Data.Values.size()); + for (HashDataContents *HD : (*HI)->Data.Values) { + // Emit the DIE offset + DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit()); + assert(CU && "Accelerated DIE should belong to a CU."); + Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset()); + // If we have multiple Atoms emit that info too. + // FIXME: A bit of a hack, we either emit only one atom or all info. + if (HeaderData.Atoms.size() > 1) { + Asm->EmitInt16(HD->Die->getTag()); + Asm->EmitInt8(HD->Flags); + } + } + PrevHash = (*HI)->HashValue; + } + // Emit the final end marker for the bucket. + if (!Buckets[i].empty()) + Asm->EmitInt32(0); + } +} + +// Emit the entire data structure to the output file. +void DwarfAccelTable::emit(AsmPrinter *Asm, const MCSymbol *SecBegin, + DwarfDebug *D) { + // Emit the header. + EmitHeader(Asm); + + // Emit the buckets. + EmitBuckets(Asm); + + // Emit the hashes. + EmitHashes(Asm); + + // Emit the offsets. + emitOffsets(Asm, SecBegin); + + // Emit the hash data. + EmitData(Asm, D); +} + +#ifndef NDEBUG +void DwarfAccelTable::print(raw_ostream &O) { + + Header.print(O); + HeaderData.print(O); + + O << "Entries: \n"; + for (StringMap<DataArray>::const_iterator EI = Entries.begin(), + EE = Entries.end(); + EI != EE; ++EI) { + O << "Name: " << EI->getKeyData() << "\n"; + for (HashDataContents *HD : EI->second.Values) + HD->print(O); + } + + O << "Buckets and Hashes: \n"; + for (size_t i = 0, e = Buckets.size(); i < e; ++i) + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); + HI != HE; ++HI) + (*HI)->print(O); + + O << "Data: \n"; + for (std::vector<HashData *>::const_iterator DI = Data.begin(), + DE = Data.end(); + DI != DE; ++DI) + (*DI)->print(O); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h new file mode 100644 index 0000000..4d81441 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -0,0 +1,256 @@ +//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFACCELTABLE_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include <vector> + +// The dwarf accelerator tables are an indirect hash table optimized +// for null lookup rather than access to known data. They are output into +// an on-disk format that looks like this: +// +// .-------------. +// | HEADER | +// |-------------| +// | BUCKETS | +// |-------------| +// | HASHES | +// |-------------| +// | OFFSETS | +// |-------------| +// | DATA | +// `-------------' +// +// where the header contains a magic number, version, type of hash function, +// the number of buckets, total number of hashes, and room for a special +// struct of data and the length of that struct. +// +// The buckets contain an index (e.g. 6) into the hashes array. The hashes +// section contains all of the 32-bit hash values in contiguous memory, and +// the offsets contain the offset into the data area for the particular +// hash. +// +// For a lookup example, we could hash a function name and take it modulo the +// number of buckets giving us our bucket. From there we take the bucket value +// as an index into the hashes table and look at each successive hash as long +// as the hash value is still the same modulo result (bucket value) as earlier. +// If we have a match we look at that same entry in the offsets table and +// grab the offset in the data for our final match. + +namespace llvm { + +class AsmPrinter; +class DwarfDebug; + +class DwarfAccelTable { + + static uint32_t HashDJB(StringRef Str) { + uint32_t h = 5381; + for (unsigned i = 0, e = Str.size(); i != e; ++i) + h = ((h << 5) + h) + Str[i]; + return h; + } + + // Helper function to compute the number of buckets needed based on + // the number of unique hashes. + void ComputeBucketCount(void); + + struct TableHeader { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. + // Also written to disk is the implementation specific header data. + + static const uint32_t MagicHash = 0x48415348; + + TableHeader(uint32_t data_len) + : magic(MagicHash), version(1), + hash_function(dwarf::DW_hash_function_djb), bucket_count(0), + hashes_count(0), header_data_len(data_len) {} + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +public: + // The HeaderData describes the form of each set of data. In general this + // is as a list of atoms (atom_count) where each atom contains a type + // (AtomType type) of data, and an encoding form (form). In the case of + // data that is referenced via DW_FORM_ref_* the die_offset_base is + // used to describe the offset for all forms in the list of atoms. + // This also serves as a public interface of sorts. + // When written to disk this will have the form: + // + // uint32_t die_offset_base + // uint32_t atom_count + // atom_count Atoms + + // Make these public so that they can be used as a general interface to + // the class. + struct Atom { + uint16_t type; // enum AtomType + uint16_t form; // DWARF DW_FORM_ defines + + LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form) + : type(type), form(form) {} +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Type: " << dwarf::AtomTypeString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +private: + struct TableHeaderData { + uint32_t die_offset_base; + SmallVector<Atom, 3> Atoms; + + TableHeaderData(ArrayRef<Atom> AtomList, uint32_t offset = 0) + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "die_offset_base: " << die_offset_base << "\n"; + for (size_t i = 0; i < Atoms.size(); i++) + Atoms[i].print(O); + } + void dump() { print(dbgs()); } +#endif + }; + + // The data itself consists of a str_offset, a count of the DIEs in the + // hash and the offsets to the DIEs themselves. + // On disk each data section is ended with a 0 KeyType as the end of the + // hash chain. + // On output this looks like: + // uint32_t str_offset + // uint32_t hash_data_count + // HashData[hash_data_count] +public: + struct HashDataContents { + const DIE *Die; // Offsets + char Flags; // Specific flags to output + + HashDataContents(const DIE *D, char Flags) : Die(D), Flags(Flags) {} +#ifndef NDEBUG + void print(raw_ostream &O) const { + O << " Offset: " << Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; + O << " Flags: " << Flags << "\n"; + } +#endif + }; + +private: + // String Data + struct DataArray { + DwarfStringPoolEntryRef Name; + std::vector<HashDataContents *> Values; + }; + friend struct HashData; + struct HashData { + StringRef Str; + uint32_t HashValue; + MCSymbol *Sym; + DwarfAccelTable::DataArray &Data; // offsets + HashData(StringRef S, DwarfAccelTable::DataArray &Data) + : Str(S), Data(Data) { + HashValue = DwarfAccelTable::HashDJB(S); + } +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Name: " << Str << "\n"; + O << " Hash Value: " << format("0x%x", HashValue) << "\n"; + O << " Symbol: "; + if (Sym) + O << *Sym; + else + O << "<none>"; + O << "\n"; + for (HashDataContents *C : Data.Values) { + O << " Offset: " << C->Die->getOffset() << "\n"; + O << " Tag: " << dwarf::TagString(C->Die->getTag()) << "\n"; + O << " Flags: " << C->Flags << "\n"; + } + } + void dump() { print(dbgs()); } +#endif + }; + + DwarfAccelTable(const DwarfAccelTable &) = delete; + void operator=(const DwarfAccelTable &) = delete; + + // Internal Functions + void EmitHeader(AsmPrinter *); + void EmitBuckets(AsmPrinter *); + void EmitHashes(AsmPrinter *); + void emitOffsets(AsmPrinter *, const MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); + + // Allocator for HashData and HashDataContents. + BumpPtrAllocator Allocator; + + // Output Variables + TableHeader Header; + TableHeaderData HeaderData; + std::vector<HashData *> Data; + + typedef StringMap<DataArray, BumpPtrAllocator &> StringEntries; + StringEntries Entries; + + // Buckets/Hashes/Offsets + typedef std::vector<HashData *> HashList; + typedef std::vector<HashList> BucketList; + BucketList Buckets; + HashList Hashes; + + // Public Implementation +public: + DwarfAccelTable(ArrayRef<DwarfAccelTable::Atom>); + void AddName(DwarfStringPoolEntryRef Name, const DIE *Die, char Flags = 0); + void FinalizeTable(AsmPrinter *, StringRef); + void emit(AsmPrinter *, const MCSymbol *, DwarfDebug *); +#ifndef NDEBUG + void print(raw_ostream &O); + void dump() { print(dbgs()); } +#endif +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp new file mode 100644 index 0000000..6665c16 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -0,0 +1,162 @@ +//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A) + : EHStreamer(A), shouldEmitCFI(false) {} + +void DwarfCFIExceptionBase::markFunctionEnd() { + if (shouldEmitCFI) + Asm->OutStreamer->EmitCFIEndProc(); + + if (MMI->getLandingPads().empty()) + return; + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); +} + +DwarfCFIException::DwarfCFIException(AsmPrinter *A) + : DwarfCFIExceptionBase(A), shouldEmitPersonality(false), + shouldEmitLSDA(false), shouldEmitMoves(false), + moveTypeModule(AsmPrinter::CFI_M_None) {} + +DwarfCFIException::~DwarfCFIException() {} + +/// endModule - Emit all exception information that should come after the +/// content. +void DwarfCFIException::endModule() { + if (moveTypeModule == AsmPrinter::CFI_M_Debug) + Asm->OutStreamer->EmitCFISections(false, true); + + // SjLj uses this pass and it doesn't need this info. + if (!Asm->MAI->usesCFIForEH()) + return; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + + if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect) + return; + + // Emit references to all used personality functions + for (const Function *Personality : MMI->getPersonalities()) { + if (!Personality) + continue; + MCSymbol *Sym = Asm->getSymbol(Personality); + TLOF.emitPersonalityValue(*Asm->OutStreamer, Asm->getDataLayout(), Sym); + } +} + +void DwarfCFIException::beginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + const Function *F = MF->getFunction(); + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MMI->getLandingPads().empty(); + + // See if we need frame move info. + AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves(); + if (MoveType == AsmPrinter::CFI_M_EH || + (MoveType == AsmPrinter::CFI_M_Debug && + moveTypeModule == AsmPrinter::CFI_M_None)) + moveTypeModule = MoveType; + + shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None; + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = nullptr; + if (F->hasPersonalityFn()) + Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + + // Emit a personality function even when there are no landing pads + bool forceEmitPersonality = + // ...if a personality function is explicitly specified + F->hasPersonalityFn() && + // ... and it's not known to be a noop in the absence of invokes + !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + // ... and we're not explicitly asked not to emit it + F->needsUnwindTableEntry(); + + shouldEmitPersonality = + (forceEmitPersonality || + (hasLandingPads && PerEncoding != dwarf::DW_EH_PE_omit)) && + Per; + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + shouldEmitCFI = shouldEmitPersonality || shouldEmitMoves; + if (!shouldEmitCFI) + return; + + Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false); + + // Indicate personality routine, if any. + if (!shouldEmitPersonality) + return; + + // If we are forced to emit this personality, make sure to record + // it because it might not appear in any landingpad + if (forceEmitPersonality) + MMI->addPersonality(Per); + + const MCSymbol *Sym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding); + + // Provide LSDA information. + if (!shouldEmitLSDA) + return; + + Asm->OutStreamer->EmitCFILsda(Asm->getCurExceptionSym(), LSDAEncoding); +} + +/// endFunction - Gather and emit post-function exception information. +/// +void DwarfCFIException::endFunction(const MachineFunction *) { + if (!shouldEmitPersonality) + return; + + emitExceptionTable(); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 0000000..725063a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,830 @@ +#include "DwarfCompileUnit.h" +#include "DwarfExpression.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instruction.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +namespace llvm { + +DwarfCompileUnit::DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, + AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : DwarfUnit(UID, dwarf::DW_TAG_compile_unit, Node, A, DW, DWU), + Skeleton(nullptr), BaseAddress(nullptr) { + insertDIE(Node, &getUnitDie()); +} + +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void DwarfCompileUnit::addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + + // Don't use the address pool in non-fission or in the skeleton unit itself. + // FIXME: Once GDB supports this, it's probably worthwhile using the address + // pool from the skeleton - maybe even in non-fission (possibly fewer + // relocations by sharing them in the pool, but we have other ideas about how + // to reduce the number of relocations as well/instead). + if (!DD->useSplitDwarf() || !Skeleton) + return addLocalLabelAddress(Die, Attribute, Label); + + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + unsigned idx = DD->getAddressPool().getIndex(Label); + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_GNU_addr_index, + DIEInteger(idx)); +} + +void DwarfCompileUnit::addLocalLabelAddress(DIE &Die, + dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + + if (Label) + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, + DIELabel(Label)); + else + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_addr, + DIEInteger(0)); +} + +unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName, + StringRef DirName) { + // If we print assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + return Asm->OutStreamer->EmitDwarfFileDirective( + 0, DirName, FileName, + Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID()); +} + +// Return const expression if value is a GEP to access merged global +// constant. e.g. +// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) +static const ConstantExpr *getMergedGlobalExpr(const Value *V) { + const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V); + if (!CE || CE->getNumOperands() != 3 || + CE->getOpcode() != Instruction::GetElementPtr) + return nullptr; + + // First operand points to a global struct. + Value *Ptr = CE->getOperand(0); + if (!isa<GlobalValue>(Ptr) || + !isa<StructType>(cast<PointerType>(Ptr->getType())->getElementType())) + return nullptr; + + // Second operand is zero. + const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1)); + if (!CI || !CI->isZero()) + return nullptr; + + // Third operand is offset. + if (!isa<ConstantInt>(CE->getOperand(2))) + return nullptr; + + return CE; +} + +/// getOrCreateGlobalVariableDIE - get or create global variable DIE. +DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( + const DIGlobalVariable *GV) { + // Check for pre-existence. + if (DIE *Die = getDIE(GV)) + return Die; + + assert(GV); + + auto *GVContext = GV->getScope(); + auto *GTy = DD->resolve(GV->getType()); + + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + + // Add to map. + DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV); + DIScope *DeclContext; + if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) { + DeclContext = resolve(SDMDecl->getScope()); + assert(SDMDecl->isStaticMember() && "Expected static member decl"); + assert(GV->isDefinition()); + // We need the declaration DIE that is in the static member's class. + DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl); + addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE); + } else { + DeclContext = GV->getScope(); + // Add name and type. + addString(*VariableDIE, dwarf::DW_AT_name, GV->getDisplayName()); + addType(*VariableDIE, GTy); + + // Add scoping info. + if (!GV->isLocalToUnit()) + addFlag(*VariableDIE, dwarf::DW_AT_external); + + // Add line number info. + addSourceLine(*VariableDIE, GV); + } + + if (!GV->isDefinition()) + addFlag(*VariableDIE, dwarf::DW_AT_declaration); + else + addGlobalName(GV->getName(), *VariableDIE, DeclContext); + + // Add location. + bool addToAccelTable = false; + if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) { + addToAccelTable = true; + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + const MCSymbol *Sym = Asm->getSymbol(Global); + if (Global->isThreadLocal()) { + if (Asm->TM.Options.EmulatedTLS) { + // TODO: add debug info for emulated thread local mode. + } else { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4 + ? dwarf::DW_OP_const4u + : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(*Loc, dwarf::DW_FORM_udata, + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym)); + } else { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(*Loc, dwarf::DW_FORM_udata, + DD->getAddressPool().getIndex(Sym, /* TLS */ true)); + } + // 3) followed by an OP to make the debugger do a TLS lookup. + addUInt(*Loc, dwarf::DW_FORM_data1, + DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address + : dwarf::DW_OP_form_tls_address); + } + } else { + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + } + + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + addLinkageName(*VariableDIE, GV->getLinkageName()); + } else if (const ConstantInt *CI = + dyn_cast_or_null<ConstantInt>(GV->getVariable())) { + addConstantValue(*VariableDIE, CI, GTy); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) { + addToAccelTable = true; + // GV is a merged global. + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + Value *Ptr = CE->getOperand(0); + MCSymbol *Sym = Asm->getSymbol(cast<GlobalValue>(Ptr)); + DD->addArangeLabel(SymbolCU(this, Sym)); + addOpAddress(*Loc, Sym); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(*Loc, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(*VariableDIE, dwarf::DW_AT_location, Loc); + } + + if (addToAccelTable) { + DD->addAccelName(GV->getName(), *VariableDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV->getLinkageName() != "" && GV->getName() != GV->getLinkageName()) + DD->addAccelName(GV->getLinkageName(), *VariableDIE); + } + + return VariableDIE; +} + +void DwarfCompileUnit::addRange(RangeSpan Range) { + bool SameAsPrevCU = this == DD->getPrevCU(); + DD->setPrevCU(this); + // If we have no current ranges just add the range and return, otherwise, + // check the current section and CU against the previous section and CU we + // emitted into and the subprogram was contained within. If these are the + // same then extend our current range, otherwise add this as a new range. + if (CURanges.empty() || !SameAsPrevCU || + (&CURanges.back().getEnd()->getSection() != + &Range.getEnd()->getSection())) { + CURanges.push_back(Range); + return; + } + + CURanges.back().setEnd(Range.getEnd()); +} + +DIE::value_iterator +DwarfCompileUnit::addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, const MCSymbol *Sec) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + return addLabel(Die, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + Label); + return addSectionDelta(Die, Attribute, Label, Sec); +} + +void DwarfCompileUnit::initStmtList() { + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = + Asm->OutStreamer->getDwarfLineTableSymbol(getUniqueID()); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + StmtListValue = + addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym, + TLOF.getDwarfLineSection()->getBeginSymbol()); +} + +void DwarfCompileUnit::applyStmtList(DIE &D) { + D.addValue(DIEValueAllocator, *StmtListValue); +} + +void DwarfCompileUnit::attachLowHighPC(DIE &D, const MCSymbol *Begin, + const MCSymbol *End) { + assert(Begin && "Begin label should not be null!"); + assert(End && "End label should not be null!"); + assert(Begin->isDefined() && "Invalid starting label"); + assert(End->isDefined() && "Invalid end label"); + + addLabelAddress(D, dwarf::DW_AT_low_pc, Begin); + if (DD->getDwarfVersion() < 4) + addLabelAddress(D, dwarf::DW_AT_high_pc, End); + else + addLabelDelta(D, dwarf::DW_AT_high_pc, End, Begin); +} + +// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc +// and DW_AT_high_pc attributes. If there are global variables in this +// scope then create and insert DIEs for these variables. +DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) { + DIE *SPDie = getOrCreateSubprogramDIE(SP, includeMinimalInlineScopes()); + + attachLowHighPC(*SPDie, Asm->getFunctionBegin(), Asm->getFunctionEnd()); + if (!DD->getCurrentFunction()->getTarget().Options.DisableFramePointerElim( + *DD->getCurrentFunction())) + addFlag(*SPDie, dwarf::DW_AT_APPLE_omit_frame_ptr); + + // Only include DW_AT_frame_base in full debug info + if (!includeMinimalInlineScopes()) { + const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo(); + MachineLocation Location(RI->getFrameRegister(*Asm->MF)); + if (RI->isPhysicalRegister(Location.getReg())) + addAddress(*SPDie, dwarf::DW_AT_frame_base, Location); + } + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + DD->addSubprogramNames(SP, *SPDie); + + return *SPDie; +} + +// Construct a DIE for this scope. +void DwarfCompileUnit::constructScopeDIE( + LexicalScope *Scope, SmallVectorImpl<DIE *> &FinalChildren) { + if (!Scope || !Scope->getScopeNode()) + return; + + auto *DS = Scope->getScopeNode(); + + assert((Scope->getInlinedAt() || !isa<DISubprogram>(DS)) && + "Only handle inlined subprograms here, use " + "constructSubprogramScopeDIE for non-inlined " + "subprograms"); + + SmallVector<DIE *, 8> Children; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. + DIE *ScopeDIE; + if (Scope->getParent() && isa<DISubprogram>(DS)) { + ScopeDIE = constructInlinedScopeDIE(Scope); + if (!ScopeDIE) + return; + // We create children when the scope DIE is not null. + createScopeChildrenDIE(Scope, Children); + } else { + // Early exit when we know the scope DIE is going to be null. + if (DD->isLexicalScopeDIENull(Scope)) + return; + + unsigned ChildScopeCount; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + createScopeChildrenDIE(Scope, Children, &ChildScopeCount); + + // Skip imported directives in gmlt-like data. + if (!includeMinimalInlineScopes()) { + // There is no need to emit empty lexical block DIE. + for (const auto *IE : ImportedEntities[DS]) + Children.push_back( + constructImportedEntityDIE(cast<DIImportedEntity>(IE))); + } + + // If there are only other scopes as children, put them directly in the + // parent instead, as this scope would serve no purpose. + if (Children.size() == ChildScopeCount) { + FinalChildren.insert(FinalChildren.end(), + std::make_move_iterator(Children.begin()), + std::make_move_iterator(Children.end())); + return; + } + ScopeDIE = constructLexicalScopeDIE(Scope); + assert(ScopeDIE && "Scope DIE should not be null."); + } + + // Add children + for (auto &I : Children) + ScopeDIE->addChild(std::move(I)); + + FinalChildren.push_back(std::move(ScopeDIE)); +} + +DIE::value_iterator +DwarfCompileUnit::addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + return Die.addValue(DIEValueAllocator, Attribute, + DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); +} + +void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE, + SmallVector<RangeSpan, 2> Range) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + // Emit offset in .debug_range as a relocatable label. emitDIE will handle + // emitting it appropriately. + const MCSymbol *RangeSectionSym = + TLOF.getDwarfRangesSection()->getBeginSymbol(); + + RangeSpanList List(Asm->createTempSymbol("debug_ranges"), std::move(Range)); + + // Under fission, ranges are specified by constant offsets relative to the + // CU's DW_AT_GNU_ranges_base. + if (isDwoUnit()) + addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + else + addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(), + RangeSectionSym); + + // Add the range list to the set of ranges to be emitted. + (Skeleton ? Skeleton : this)->CURangeLists.push_back(std::move(List)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, SmallVector<RangeSpan, 2> Ranges) { + if (Ranges.size() == 1) { + const auto &single = Ranges.front(); + attachLowHighPC(Die, single.getStart(), single.getEnd()); + } else + addScopeRangeList(Die, std::move(Ranges)); +} + +void DwarfCompileUnit::attachRangesOrLowHighPC( + DIE &Die, const SmallVectorImpl<InsnRange> &Ranges) { + SmallVector<RangeSpan, 2> List; + List.reserve(Ranges.size()); + for (const InsnRange &R : Ranges) + List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first), + DD->getLabelAfterInsn(R.second))); + attachRangesOrLowHighPC(Die, std::move(List)); +} + +// This scope represents inlined body of a function. Construct DIE to +// represent this concrete inlined copy of the function. +DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) { + assert(Scope->getScopeNode()); + auto *DS = Scope->getScopeNode(); + auto *InlinedSP = getDISubprogram(DS); + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + DIE *OriginDIE = DU->getAbstractSPDies()[InlinedSP]; + assert(OriginDIE && "Unable to find original DIE for an inlined subprogram."); + + auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_inlined_subroutine); + addDIEEntry(*ScopeDIE, dwarf::DW_AT_abstract_origin, *OriginDIE); + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + // Add the call site information to the DIE. + const DILocation *IA = Scope->getInlinedAt(); + addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None, + getOrCreateSourceID(IA->getFilename(), IA->getDirectory())); + addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine()); + if (IA->getDiscriminator()) + addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None, + IA->getDiscriminator()); + + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_inlined_subprogram nodes. + DD->addSubprogramNames(InlinedSP, *ScopeDIE); + + return ScopeDIE; +} + +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +DIE *DwarfCompileUnit::constructLexicalScopeDIE(LexicalScope *Scope) { + if (DD->isLexicalScopeDIENull(Scope)) + return nullptr; + + auto ScopeDIE = DIE::get(DIEValueAllocator, dwarf::DW_TAG_lexical_block); + if (Scope->isAbstractScope()) + return ScopeDIE; + + attachRangesOrLowHighPC(*ScopeDIE, Scope->getRanges()); + + return ScopeDIE; +} + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, bool Abstract) { + auto D = constructVariableDIEImpl(DV, Abstract); + DV.setDIE(*D); + return D; +} + +DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, + bool Abstract) { + // Define variable debug information entry. + auto VariableDie = DIE::get(DIEValueAllocator, DV.getTag()); + + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); + return VariableDie; + } + + // Add variable address. + + unsigned Offset = DV.getDebugLocListIndex(); + if (Offset != ~0U) { + addLocationList(*VariableDie, dwarf::DW_AT_location, Offset); + return VariableDie; + } + + // Check if variable is described by a DBG_VALUE instruction. + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 4); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, *VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, *VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(*VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(), + DV.getType()); + + return VariableDie; + } + + // .. else use frame index. + if (DV.getFrameIndex().empty()) + return VariableDie; + + auto Expr = DV.getExpression().begin(); + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + for (auto FI : DV.getFrameIndex()) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + assert(Expr != DV.getExpression().end() && "Wrong number of expressions"); + DwarfExpr.AddMachineRegIndirect(FrameReg, Offset); + DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end()); + ++Expr; + } + addBlock(*VariableDie, dwarf::DW_AT_location, Loc); + + return VariableDie; +} + +DIE *DwarfCompileUnit::constructVariableDIE(DbgVariable &DV, + const LexicalScope &Scope, + DIE *&ObjectPointer) { + auto Var = constructVariableDIE(DV, Scope.isAbstractScope()); + if (DV.isObjectPointer()) + ObjectPointer = Var; + return Var; +} + +DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope, + SmallVectorImpl<DIE *> &Children, + unsigned *ChildScopeCount) { + DIE *ObjectPointer = nullptr; + + for (DbgVariable *DV : DU->getScopeVariables().lookup(Scope)) + Children.push_back(constructVariableDIE(*DV, *Scope, ObjectPointer)); + + unsigned ChildCountWithoutScopes = Children.size(); + + for (LexicalScope *LS : Scope->getChildren()) + constructScopeDIE(LS, Children); + + if (ChildScopeCount) + *ChildScopeCount = Children.size() - ChildCountWithoutScopes; + + return ObjectPointer; +} + +void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(!Scope->getInlinedAt()); + assert(!Scope->isAbstractScope()); + auto *Sub = cast<DISubprogram>(Scope->getScopeNode()); + + DD->getProcessedSPNodes().insert(Sub); + + DIE &ScopeDIE = updateSubprogramScopeDIE(Sub); + + // If this is a variadic function, add an unspecified parameter. + DITypeRefArray FnArgs = Sub->getType()->getTypeArray(); + + // Collect lexical scope children first. + // ObjectPointer might be a local (non-argument) local variable if it's a + // block's synthetic this pointer. + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE)) + addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer); + + // If we have a single element of null, it is a function that returns void. + // If we have more than one elements and the last one is null, it is a + // variadic function. + if (FnArgs.size() > 1 && !FnArgs[FnArgs.size() - 1] && + !includeMinimalInlineScopes()) + ScopeDIE.addChild( + DIE::get(DIEValueAllocator, dwarf::DW_TAG_unspecified_parameters)); +} + +DIE *DwarfCompileUnit::createAndAddScopeChildren(LexicalScope *Scope, + DIE &ScopeDIE) { + // We create children when the scope DIE is not null. + SmallVector<DIE *, 8> Children; + DIE *ObjectPointer = createScopeChildrenDIE(Scope, Children); + + // Add children + for (auto &I : Children) + ScopeDIE.addChild(std::move(I)); + + return ObjectPointer; +} + +void DwarfCompileUnit::constructAbstractSubprogramScopeDIE( + LexicalScope *Scope) { + DIE *&AbsDef = DU->getAbstractSPDies()[Scope->getScopeNode()]; + if (AbsDef) + return; + + auto *SP = cast<DISubprogram>(Scope->getScopeNode()); + + DIE *ContextDIE; + + if (includeMinimalInlineScopes()) + ContextDIE = &getUnitDie(); + // Some of this is duplicated from DwarfUnit::getOrCreateSubprogramDIE, with + // the important distinction that the debug node is not associated with the + // DIE (since the debug node will be associated with the concrete DIE, if + // any). It could be refactored to some common utility function. + else if (auto *SPDecl = SP->getDeclaration()) { + ContextDIE = &getUnitDie(); + getOrCreateSubprogramDIE(SPDecl); + } else + ContextDIE = getOrCreateContextDIE(resolve(SP->getScope())); + + // Passing null as the associated node because the abstract definition + // shouldn't be found by lookup. + AbsDef = &createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, nullptr); + applySubprogramAttributesToDefinition(SP, *AbsDef); + + if (!includeMinimalInlineScopes()) + addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); + if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, *AbsDef)) + addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer); +} + +DIE *DwarfCompileUnit::constructImportedEntityDIE( + const DIImportedEntity *Module) { + DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag()); + insertDIE(Module, IMDie); + DIE *EntityDie; + auto *Entity = resolve(Module->getEntity()); + if (auto *NS = dyn_cast<DINamespace>(Entity)) + EntityDie = getOrCreateNameSpace(NS); + else if (auto *M = dyn_cast<DIModule>(Entity)) + EntityDie = getOrCreateModule(M); + else if (auto *SP = dyn_cast<DISubprogram>(Entity)) + EntityDie = getOrCreateSubprogramDIE(SP); + else if (auto *T = dyn_cast<DIType>(Entity)) + EntityDie = getOrCreateTypeDIE(T); + else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity)) + EntityDie = getOrCreateGlobalVariableDIE(GV); + else + EntityDie = getDIE(Entity); + assert(EntityDie); + addSourceLine(*IMDie, Module->getLine(), Module->getScope()->getFilename(), + Module->getScope()->getDirectory()); + addDIEEntry(*IMDie, dwarf::DW_AT_import, *EntityDie); + StringRef Name = Module->getName(); + if (!Name.empty()) + addString(*IMDie, dwarf::DW_AT_name, Name); + + return IMDie; +} + +void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) { + DIE *D = getDIE(SP); + if (DIE *AbsSPDIE = DU->getAbstractSPDies().lookup(SP)) { + if (D) + // If this subprogram has an abstract definition, reference that + addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE); + } else { + if (!D && !includeMinimalInlineScopes()) + // Lazily construct the subprogram if we didn't see either concrete or + // inlined versions during codegen. (except in -gmlt ^ where we want + // to omit these entirely) + D = getOrCreateSubprogramDIE(SP); + if (D) + // And attach the attributes + applySubprogramAttributesToDefinition(SP, *D); + } +} +void DwarfCompileUnit::collectDeadVariables(const DISubprogram *SP) { + assert(SP && "CU's subprogram list contains a non-subprogram"); + assert(SP->isDefinition() && + "CU's subprogram list contains a subprogram declaration"); + auto Variables = SP->getVariables(); + if (Variables.size() == 0) + return; + + DIE *SPDIE = DU->getAbstractSPDies().lookup(SP); + if (!SPDIE) + SPDIE = getDIE(SP); + assert(SPDIE); + for (const DILocalVariable *DV : Variables) { + DbgVariable NewVar(DV, /* IA */ nullptr, DD); + auto VariableDie = constructVariableDIE(NewVar); + applyVariableAttributes(NewVar, *VariableDie); + SPDIE->addChild(std::move(VariableDie)); + } +} + +void DwarfCompileUnit::emitHeader(bool UseOffsets) { + // Don't bother labeling the .dwo unit, as its offset isn't used. + if (!Skeleton) { + LabelBegin = Asm->createTempSymbol("cu_begin"); + Asm->OutStreamer->EmitLabel(LabelBegin); + } + + DwarfUnit::emitHeader(UseOffsets); +} + +/// addGlobalName - Add a new global name to the compile unit. +void DwarfCompileUnit::addGlobalName(StringRef Name, DIE &Die, + const DIScope *Context) { + if (includeMinimalInlineScopes()) + return; + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = &Die; +} + +/// Add a new global type to the unit. +void DwarfCompileUnit::addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) { + if (includeMinimalInlineScopes()) + return; + std::string FullName = getParentContextString(Context) + Ty->getName().str(); + GlobalTypes[FullName] = &Die; +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location) { + if (DV.hasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV.isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// Add an address attribute to a die based on the location provided. +void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + + bool validReg; + if (Location.isReg()) + validReg = addRegisterOpPiece(*Loc, Location.getReg()); + else + validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + if (!validReg) + return; + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Loc); +} + +/// Start with the address based on the location provided, and generate the +/// DWARF information necessary to find the actual variable given the extra +/// address information encoded in the DbgVariable, starting from the starting +/// location. Add the DWARF information to the die. +void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); + assert(DV.getExpression().size() == 1); + const DIExpression *Expr = DV.getExpression().back(); + bool ValidReg; + if (Location.getOffset()) { + ValidReg = DwarfExpr.AddMachineRegIndirect(Location.getReg(), + Location.getOffset()); + if (ValidReg) + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end()); + } else + ValidReg = DwarfExpr.AddMachineRegExpression(Expr, Location.getReg()); + + // Now attach the location information to the DIE. + if (ValidReg) + addBlock(Die, Attribute, Loc); +} + +/// Add a Dwarf loclistptr attribute data and value. +void DwarfCompileUnit::addLocationList(DIE &Die, dwarf::Attribute Attribute, + unsigned Index) { + dwarf::Form Form = DD->getDwarfVersion() >= 4 ? dwarf::DW_FORM_sec_offset + : dwarf::DW_FORM_data4; + Die.addValue(DIEValueAllocator, Attribute, Form, DIELocList(Index)); +} + +void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, Var.getVariable()); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); +} + +/// Add a Dwarf expression attribute data and value. +void DwarfCompileUnit::addExpr(DIELoc &Die, dwarf::Form Form, + const MCExpr *Expr) { + Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, Form, DIEExpr(Expr)); +} + +void DwarfCompileUnit::applySubprogramAttributesToDefinition( + const DISubprogram *SP, DIE &SPDie) { + auto *SPDecl = SP->getDeclaration(); + auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope()); + applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes()); + addGlobalName(SP->getName(), SPDie, Context); +} + +bool DwarfCompileUnit::isDwoUnit() const { + return DD->useSplitDwarf() && Skeleton; +} + +bool DwarfCompileUnit::includeMinimalInlineScopes() const { + return getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly || + (DD->useSplitDwarf() && !Skeleton); +} +} // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 0000000..2e28467 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,243 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DwarfUnit.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Dwarf.h" + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; +class DwarfFile; +class MCSymbol; +class LexicalScope; + +class DwarfCompileUnit : public DwarfUnit { + /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding + /// the need to search for it in applyStmtList. + DIE::value_iterator StmtListValue; + + /// Skeleton unit associated with this unit. + DwarfCompileUnit *Skeleton; + + /// The start of the unit within its section. + MCSymbol *LabelBegin; + + typedef llvm::SmallVector<const MDNode *, 8> ImportedEntityList; + typedef llvm::DenseMap<const MDNode *, ImportedEntityList> + ImportedEntityMap; + + ImportedEntityMap ImportedEntities; + + /// GlobalNames - A map of globally visible named entities for this unit. + StringMap<const DIE *> GlobalNames; + + /// GlobalTypes - A map of globally visible types for this unit. + StringMap<const DIE *> GlobalTypes; + + // List of range lists for a given compile unit, separate from the ranges for + // the CU itself. + SmallVector<RangeSpanList, 1> CURangeLists; + + // List of ranges for a given compile unit. + SmallVector<RangeSpan, 2> CURanges; + + // The base address of this unit, if any. Used for relative references in + // ranges/locs. + const MCSymbol *BaseAddress; + + /// \brief Construct a DIE for the given DbgVariable without initializing the + /// DbgVariable's DIE reference. + DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract); + + bool isDwoUnit() const override; + + bool includeMinimalInlineScopes() const; + +public: + DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + DwarfCompileUnit *getSkeleton() const { + return Skeleton; + } + + void initStmtList(); + + /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE. + void applyStmtList(DIE &D); + + /// getOrCreateGlobalVariableDIE - get or create global variable DIE. + DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV); + + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addLocalLabelAddress - Add a dwarf label attribute data and value using + /// DW_FORM_addr only. + void addLocalLabelAddress(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label); + + /// addSectionDelta - Add a label delta attribute data and value. + DIE::value_iterator addSectionDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo); + + DwarfCompileUnit &getCU() override { return *this; } + + unsigned getOrCreateSourceID(StringRef FileName, StringRef DirName) override; + + void addImportedEntity(const DIImportedEntity* IE) { + ImportedEntities[IE->getScope()].push_back(IE); + } + + /// addRange - Add an address range to the list of ranges for this unit. + void addRange(RangeSpan Range); + + void attachLowHighPC(DIE &D, const MCSymbol *Begin, const MCSymbol *End); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + DIE::value_iterator addSectionLabel(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Label, + const MCSymbol *Sec); + + /// \brief Find DIE for the given subprogram and attach appropriate + /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global + /// variables in this scope then create and insert DIEs for these + /// variables. + DIE &updateSubprogramScopeDIE(const DISubprogram *SP); + + void constructScopeDIE(LexicalScope *Scope, + SmallVectorImpl<DIE *> &FinalChildren); + + /// \brief A helper function to construct a RangeSpanList for a given + /// lexical scope. + void addScopeRangeList(DIE &ScopeDIE, SmallVector<RangeSpan, 2> Range); + + void attachRangesOrLowHighPC(DIE &D, SmallVector<RangeSpan, 2> Ranges); + + void attachRangesOrLowHighPC(DIE &D, + const SmallVectorImpl<InsnRange> &Ranges); + /// \brief This scope represents inlined body of a function. Construct + /// DIE to represent this concrete inlined copy of the function. + DIE *constructInlinedScopeDIE(LexicalScope *Scope); + + /// \brief Construct new DW_TAG_lexical_block for this scope and + /// attach DW_AT_low_pc/DW_AT_high_pc labels. + DIE *constructLexicalScopeDIE(LexicalScope *Scope); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool Abstract = false); + + DIE *constructVariableDIE(DbgVariable &DV, const LexicalScope &Scope, + DIE *&ObjectPointer); + + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(LexicalScope *Scope, + SmallVectorImpl<DIE *> &Children, + unsigned *ChildScopeCount = nullptr); + + /// \brief Construct a DIE for this subprogram scope. + void constructSubprogramScopeDIE(LexicalScope *Scope); + + DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE); + + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + + /// \brief Construct import_module DIE. + DIE *constructImportedEntityDIE(const DIImportedEntity *Module); + + void finishSubprogramDefinition(const DISubprogram *SP); + + void collectDeadVariables(const DISubprogram *SP); + + /// Set the skeleton unit associated with this unit. + void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; } + + const MCSymbol *getSectionSym() const { + assert(Section); + return Section->getBeginSymbol(); + } + + unsigned getLength() { + return sizeof(uint32_t) + // Length field + getHeaderSize() + UnitDie.getSize(); + } + + void emitHeader(bool UseOffsets) override; + + MCSymbol *getLabelBegin() const { + assert(Section); + return LabelBegin; + } + + /// Add a new global name to the compile unit. + void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) override; + + /// Add a new global type to the compile unit. + void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) override; + + const StringMap<const DIE *> &getGlobalNames() const { return GlobalNames; } + const StringMap<const DIE *> &getGlobalTypes() const { return GlobalTypes; } + + /// Add DW_AT_location attribute for a DbgVariable based on provided + /// MachineLocation. + void addVariableAddress(const DbgVariable &DV, DIE &Die, + MachineLocation Location); + /// Add an address attribute to a die based on the location provided. + void addAddress(DIE &Die, dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Start with the address based on the location provided, and generate the + /// DWARF information necessary to find the actual variable (navigating the + /// extra location information encoded in the type) based on the starting + /// location. Add the DWARF information to the die. + void addComplexAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Add a Dwarf loclistptr attribute data and value. + void addLocationList(DIE &Die, dwarf::Attribute Attribute, unsigned Index); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); + + /// Add a Dwarf expression attribute data and value. + void addExpr(DIELoc &Die, dwarf::Form Form, const MCExpr *Expr); + + void applySubprogramAttributesToDefinition(const DISubprogram *SP, + DIE &SPDie); + + /// getRangeLists - Get the vector of range lists. + const SmallVectorImpl<RangeSpanList> &getRangeLists() const { + return (Skeleton ? Skeleton : this)->CURangeLists; + } + + /// getRanges - Get the list of ranges for this unit. + const SmallVectorImpl<RangeSpan> &getRanges() const { return CURanges; } + SmallVector<RangeSpan, 2> takeRanges() { return std::move(CURanges); } + + void setBaseAddress(const MCSymbol *Base) { BaseAddress = Base; } + const MCSymbol *getBaseAddress() const { return BaseAddress; } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp new file mode 100644 index 0000000..3466f34 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -0,0 +1,2018 @@ +//===-- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfDebug.h" +#include "ByteStreamer.h" +#include "DIEHash.h" +#include "DebugLocEntry.h" +#include "DwarfCompileUnit.h" +#include "DwarfExpression.h" +#include "DwarfUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +static cl::opt<bool> +DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); + +static cl::opt<bool> UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::init(false)); + +static cl::opt<bool> +GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate GNU-style pubnames and pubtypes"), + cl::init(false)); + +static cl::opt<bool> GenerateARangeSection("generate-arange-section", + cl::Hidden, + cl::desc("Generate dwarf aranges"), + cl::init(false)); + +namespace { +enum DefaultOnOff { Default, Enable, Disable }; +} + +static cl::opt<DefaultOnOff> +DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output DWARF5 split debug info."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate DWARF pubnames and pubtypes sections"), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt<DefaultOnOff> +DwarfLinkageNames("dwarf-linkage-names", cl::Hidden, + cl::desc("Emit DWARF linkage-name attributes."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; + +void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) { + BS.EmitInt8( + Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op) + : dwarf::OperationEncodingString(Op)); +} + +void DebugLocDwarfExpression::EmitSigned(int64_t Value) { + BS.EmitSLEB128(Value, Twine(Value)); +} + +void DebugLocDwarfExpression::EmitUnsigned(uint64_t Value) { + BS.EmitULEB128(Value, Twine(Value)); +} + +bool DebugLocDwarfExpression::isFrameRegister(unsigned MachineReg) { + // This information is not available while emitting .debug_loc entries. + return false; +} + +//===----------------------------------------------------------------------===// + +/// resolve - Look in the DwarfDebug map for the MDNode that +/// corresponds to the reference. +template <typename T> T *DbgVariable::resolve(TypedDINodeRef<T> Ref) const { + return DD->resolve(Ref); +} + +bool DbgVariable::isBlockByrefVariable() const { + assert(Var && "Invalid complex DbgVariable!"); + return Var->getType() + .resolve(DD->getTypeIdentifierMap()) + ->isBlockByrefStruct(); +} + +const DIType *DbgVariable::getType() const { + DIType *Ty = Var->getType().resolve(DD->getTypeIdentifierMap()); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Ty->isBlockByrefStruct()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType *subType = Ty; + uint16_t tag = Ty->getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) + subType = resolve(cast<DIDerivedType>(Ty)->getBaseType()); + + auto Elements = cast<DICompositeType>(subType)->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *DT = cast<DIDerivedType>(Elements[i]); + if (getName() == DT->getName()) + return resolve(DT->getBaseType()); + } + } + return Ty; +} + +static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = { + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4), + DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2), + DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)}; + +DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) + : Asm(A), MMI(Asm->MMI), DebugLocs(A->OutStreamer->isVerboseAsm()), + PrevLabel(nullptr), InfoHolder(A, "info_string", DIEValueAllocator), + SkeletonHolder(A, "skel_string", DIEValueAllocator), + IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()), + AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelNamespace(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, + dwarf::DW_FORM_data4)), + AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) { + + CurFn = nullptr; + CurMI = nullptr; + Triple TT(Asm->getTargetTriple()); + + // Make sure we know our "debugger tuning." The target option takes + // precedence; fall back to triple-based defaults. + if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default) + DebuggerTuning = Asm->TM.Options.DebuggerTuning; + else if (IsDarwin || TT.isOSFreeBSD()) + DebuggerTuning = DebuggerKind::LLDB; + else if (TT.isPS4CPU()) + DebuggerTuning = DebuggerKind::SCE; + else + DebuggerTuning = DebuggerKind::GDB; + + // Turn on accelerator tables for LLDB by default. + if (DwarfAccelTables == Default) + HasDwarfAccelTables = tuneForLLDB(); + else + HasDwarfAccelTables = DwarfAccelTables == Enable; + + // Handle split DWARF. Off by default for now. + if (SplitDwarf == Default) + HasSplitDwarf = false; + else + HasSplitDwarf = SplitDwarf == Enable; + + // Pubnames/pubtypes on by default for GDB. + if (DwarfPubSections == Default) + HasDwarfPubSections = tuneForGDB(); + else + HasDwarfPubSections = DwarfPubSections == Enable; + + // SCE does not use linkage names. + if (DwarfLinkageNames == Default) + UseLinkageNames = !tuneForSCE(); + else + UseLinkageNames = DwarfLinkageNames == Enable; + + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; + DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber + : MMI->getModule()->getDwarfVersion(); + // Use dwarf 4 by default if nothing is requested. + DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION; + + // Work around a GDB bug. GDB doesn't support the standard opcode; + // SCE doesn't support GNU's; LLDB prefers the standard opcode, which + // is defined as of DWARF 3. + // See GDB bug 11616 - DW_OP_form_tls_address is unimplemented + // https://sourceware.org/bugzilla/show_bug.cgi?id=11616 + UseGNUTLSOpcode = tuneForGDB() || DwarfVersion < 3; + + Asm->OutStreamer->getContext().setDwarfVersion(DwarfVersion); + + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + beginModule(); + } +} + +// Define out of line so we don't have to include DwarfUnit.h in DwarfDebug.h. +DwarfDebug::~DwarfDebug() { } + +static bool isObjCClass(StringRef Name) { + return Name.startswith("+") || Name.startswith("-"); +} + +static bool hasObjCCategory(StringRef Name) { + if (!isObjCClass(Name)) + return false; + + return Name.find(") ") != StringRef::npos; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); + return; +} + +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + +// Add the various names to the Dwarf accelerator table names. +// TODO: Determine whether or not we should add names for programs +// that do not have a DW_AT_name or DW_AT_linkage_name field - this +// is only slightly different than the lookup of non-standard ObjC names. +void DwarfDebug::addSubprogramNames(const DISubprogram *SP, DIE &Die) { + if (!SP->isDefinition()) + return; + addAccelName(SP->getName(), Die); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP->getLinkageName() != "" && SP->getName() != SP->getLinkageName()) + addAccelName(SP->getLinkageName(), Die); + + // If this is an Objective-C selector name add it to the ObjC accelerator + // too. + if (isObjCClass(SP->getName())) { + StringRef Class, Category; + getObjCClassCategory(SP->getName(), Class, Category); + addAccelObjC(Class, Die); + if (Category != "") + addAccelObjC(Category, Die); + // Also add the base method name to the name table. + addAccelName(getObjCMethodName(SP->getName()), Die); + } +} + +/// Check whether we should create a DIE for the given Scope, return true +/// if we don't create a DIE (the corresponding DIE is null). +bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { + if (Scope->isAbstractScope()) + return false; + + // We don't create a DIE if there is no Range. + const SmallVectorImpl<InsnRange> &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return true; + + if (Ranges.size() > 1) + return false; + + // We don't create a DIE if we have a single Range and the end label + // is null. + return !getLabelAfterInsn(Ranges.front().second); +} + +template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) { + F(CU); + if (auto *SkelCU = CU.getSkeleton()) + F(*SkelCU); +} + +void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) { + assert(Scope && Scope->getScopeNode()); + assert(Scope->isAbstractScope()); + assert(!Scope->getInlinedAt()); + + const MDNode *SP = Scope->getScopeNode(); + + ProcessedSPNodes.insert(SP); + + // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram + // was inlined from another compile unit. + auto &CU = SPMap[SP]; + forBothCUs(*CU, [&](DwarfCompileUnit &CU) { + CU.constructAbstractSubprogramScopeDIE(Scope); + }); +} + +void DwarfDebug::addGnuPubAttributes(DwarfUnit &U, DIE &D) const { + if (!GenerateGnuPubSections) + return; + + U.addFlag(D, dwarf::DW_AT_GNU_pubnames); +} + +// Create new DwarfCompileUnit for the given metadata node with tag +// DW_TAG_compile_unit. +DwarfCompileUnit & +DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) { + StringRef FN = DIUnit->getFilename(); + CompilationDir = DIUnit->getDirectory(); + + auto OwnedUnit = make_unique<DwarfCompileUnit>( + InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + DIE &Die = NewCU.getUnitDie(); + InfoHolder.addUnit(std::move(OwnedUnit)); + if (useSplitDwarf()) + NewCU.setSkeleton(constructSkeletonCU(NewCU)); + + // LTO with assembly output shares a single line table amongst multiple CUs. + // To avoid the compilation directory being ambiguous, let the line table + // explicitly describe the directory of all files, never relying on the + // compilation directory. + if (!Asm->OutStreamer->hasRawTextSupport() || SingleCU) + Asm->OutStreamer->getContext().setMCLineTableCompilationDir( + NewCU.getUniqueID(), CompilationDir); + + NewCU.addString(Die, dwarf::DW_AT_producer, DIUnit->getProducer()); + NewCU.addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit->getSourceLanguage()); + NewCU.addString(Die, dwarf::DW_AT_name, FN); + + if (!useSplitDwarf()) { + NewCU.initStmtList(); + + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!CompilationDir.empty()) + NewCU.addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + addGnuPubAttributes(NewCU, Die); + } + + if (DIUnit->isOptimized()) + NewCU.addFlag(Die, dwarf::DW_AT_APPLE_optimized); + + StringRef Flags = DIUnit->getFlags(); + if (!Flags.empty()) + NewCU.addString(Die, dwarf::DW_AT_APPLE_flags, Flags); + + if (unsigned RVer = DIUnit->getRuntimeVersion()) + NewCU.addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + dwarf::DW_FORM_data1, RVer); + + if (useSplitDwarf()) + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection()); + else + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); + + if (DIUnit->getDWOId()) { + // This CU is either a clang module DWO or a skeleton CU. + NewCU.addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, + DIUnit->getDWOId()); + if (!DIUnit->getSplitDebugFilename().empty()) + // This is a prefabricated skeleton CU. + NewCU.addString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit->getSplitDebugFilename()); + } + + CUMap.insert(std::make_pair(DIUnit, &NewCU)); + CUDieMap.insert(std::make_pair(&Die, &NewCU)); + return NewCU; +} + +void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N) { + if (DIE *D = TheCU.getOrCreateContextDIE(N->getScope())) + D->addChild(TheCU.constructImportedEntityDIE(N)); +} + +// Emit all Dwarf sections that should come prior to the content. Create +// global DIEs and emit initial debug info sections. This is invoked by +// the target AsmPrinter. +void DwarfDebug::beginModule() { + if (DisableDebugInfoPrinting) + return; + + const Module *M = MMI->getModule(); + + NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) + return; + TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); + + SingleCU = CU_Nodes->getNumOperands() == 1; + + for (MDNode *N : CU_Nodes->operands()) { + auto *CUNode = cast<DICompileUnit>(N); + DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode); + for (auto *IE : CUNode->getImportedEntities()) + CU.addImportedEntity(IE); + for (auto *GV : CUNode->getGlobalVariables()) + CU.getOrCreateGlobalVariableDIE(GV); + for (auto *SP : CUNode->getSubprograms()) + SPMap.insert(std::make_pair(SP, &CU)); + for (auto *Ty : CUNode->getEnumTypes()) { + // The enum types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + CU.getOrCreateTypeDIE(cast<DIType>(resolve(Ty->getRef()))); + } + for (auto *Ty : CUNode->getRetainedTypes()) { + // The retained types array by design contains pointers to + // MDNodes rather than DIRefs. Unique them here. + DIType *RT = cast<DIType>(resolve(Ty->getRef())); + if (!RT->isExternalTypeRef()) + // There is no point in force-emitting a forward declaration. + CU.getOrCreateTypeDIE(RT); + } + // Emit imported_modules last so that the relevant context is already + // available. + for (auto *IE : CUNode->getImportedEntities()) + constructAndAddImportedEntityDIE(CU, IE); + } + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); +} + +void DwarfDebug::finishVariableDefinitions() { + for (const auto &Var : ConcreteVariables) { + DIE *VariableDie = Var->getDIE(); + assert(VariableDie); + // FIXME: Consider the time-space tradeoff of just storing the unit pointer + // in the ConcreteVariables list, rather than looking it up again here. + // DIE::getUnit isn't simple - it walks parent pointers, etc. + DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); + assert(Unit); + DbgVariable *AbsVar = getExistingAbstractVariable( + InlinedVariable(Var->getVariable(), Var->getInlinedAt())); + if (AbsVar && AbsVar->getDIE()) { + Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, + *AbsVar->getDIE()); + } else + Unit->applyVariableAttributes(*Var, *VariableDie); + } +} + +void DwarfDebug::finishSubprogramDefinitions() { + for (const auto &P : SPMap) + forBothCUs(*P.second, [&](DwarfCompileUnit &CU) { + CU.finishSubprogramDefinition(cast<DISubprogram>(P.first)); + }); +} + + +// Collect info for variables that were optimized out. +void DwarfDebug::collectDeadVariables() { + const Module *M = MMI->getModule(); + + if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { + for (MDNode *N : CU_Nodes->operands()) { + auto *TheCU = cast<DICompileUnit>(N); + // Construct subprogram DIE and add variables DIEs. + DwarfCompileUnit *SPCU = + static_cast<DwarfCompileUnit *>(CUMap.lookup(TheCU)); + assert(SPCU && "Unable to find Compile Unit!"); + for (auto *SP : TheCU->getSubprograms()) { + if (ProcessedSPNodes.count(SP) != 0) + continue; + SPCU->collectDeadVariables(SP); + } + } + } +} + +void DwarfDebug::finalizeModuleInfo() { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + + finishSubprogramDefinitions(); + + finishVariableDefinitions(); + + // Collect info for variables that were optimized out. + collectDeadVariables(); + + // Handle anything that needs to be done on a per-unit basis after + // all other generation. + for (const auto &P : CUMap) { + auto &TheCU = *P.second; + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. + TheCU.constructContainingTypeDIEs(); + + // Add CU specific attributes if we need to add any. + // If we're splitting the dwarf out now that we've got the entire + // CU then add the dwo id to it. + auto *SkCU = TheCU.getSkeleton(); + if (useSplitDwarf()) { + // Emit a unique identifier for this CU. + uint64_t ID = DIEHash(Asm).computeCUSignature(TheCU.getUnitDie()); + TheCU.addUInt(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + SkCU->addUInt(SkCU->getUnitDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + + // We don't keep track of which addresses are used in which CU so this + // is a bit pessimistic under LTO. + if (!AddrPool.isEmpty()) { + const MCSymbol *Sym = TLOF.getDwarfAddrSection()->getBeginSymbol(); + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_addr_base, + Sym, Sym); + } + if (!SkCU->getRangeLists().empty()) { + const MCSymbol *Sym = TLOF.getDwarfRangesSection()->getBeginSymbol(); + SkCU->addSectionLabel(SkCU->getUnitDie(), dwarf::DW_AT_GNU_ranges_base, + Sym, Sym); + } + } + + // If we have code split among multiple sections or non-contiguous + // ranges of code then emit a DW_AT_ranges attribute on the unit that will + // remain in the .o file, otherwise add a DW_AT_low_pc. + // FIXME: We should use ranges allow reordering of code ala + // .subsections_via_symbols in mach-o. This would mean turning on + // ranges for all subprogram DIEs for mach-o. + DwarfCompileUnit &U = SkCU ? *SkCU : TheCU; + if (unsigned NumRanges = TheCU.getRanges().size()) { + if (NumRanges > 1) + // A DW_AT_low_pc attribute may also be specified in combination with + // DW_AT_ranges to specify the default base address for use in + // location lists (see Section 2.6.2) and range lists (see Section + // 2.17.3). + U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + else + U.setBaseAddress(TheCU.getRanges().front().getStart()); + U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges()); + } + } + + // Compute DIE offsets and sizes. + InfoHolder.computeSizeAndOffsets(); + if (useSplitDwarf()) + SkeletonHolder.computeSizeAndOffsets(); +} + +// Emit all Dwarf sections that should come after the content. +void DwarfDebug::endModule() { + assert(CurFn == nullptr); + assert(CurMI == nullptr); + + // If we aren't actually generating debug info (check beginModule - + // conditionalized on !DisableDebugInfoPrinting and the presence of the + // llvm.dbg.cu metadata node) + if (!MMI->hasDebugInfo()) + return; + + // Finalize the debug info for the module. + finalizeModuleInfo(); + + emitDebugStr(); + + if (useSplitDwarf()) + emitDebugLocDWO(); + else + // Emit info into a debug loc section. + emitDebugLoc(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); + + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + + // Emit info into a debug aranges section. + if (GenerateARangeSection) + emitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + if (useSplitDwarf()) { + emitDebugStrDWO(); + emitDebugInfoDWO(); + emitDebugAbbrevDWO(); + emitDebugLineDWO(); + // Emit DWO addresses. + AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); + } + + // Emit info into the dwarf accelerator table sections. + if (useDwarfAccelTables()) { + emitAccelNames(); + emitAccelObjC(); + emitAccelNamespaces(); + emitAccelTypes(); + } + + // Emit the pubnames and pubtypes sections if requested. + if (HasDwarfPubSections) { + emitDebugPubNames(GenerateGnuPubSections); + emitDebugPubTypes(GenerateGnuPubSections); + } + + // clean up. + SPMap.clear(); + AbstractVariables.clear(); +} + +// Find abstract variable, if any, associated with Var. +DbgVariable * +DwarfDebug::getExistingAbstractVariable(InlinedVariable IV, + const DILocalVariable *&Cleansed) { + // More then one inlined variable corresponds to one abstract variable. + Cleansed = IV.first; + auto I = AbstractVariables.find(Cleansed); + if (I != AbstractVariables.end()) + return I->second.get(); + return nullptr; +} + +DbgVariable *DwarfDebug::getExistingAbstractVariable(InlinedVariable IV) { + const DILocalVariable *Cleansed; + return getExistingAbstractVariable(IV, Cleansed); +} + +void DwarfDebug::createAbstractVariable(const DILocalVariable *Var, + LexicalScope *Scope) { + auto AbsDbgVariable = make_unique<DbgVariable>(Var, /* IA */ nullptr, this); + InfoHolder.addScopeVariable(Scope, AbsDbgVariable.get()); + AbstractVariables[Var] = std::move(AbsDbgVariable); +} + +void DwarfDebug::ensureAbstractVariableIsCreated(InlinedVariable IV, + const MDNode *ScopeNode) { + const DILocalVariable *Cleansed = nullptr; + if (getExistingAbstractVariable(IV, Cleansed)) + return; + + createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope( + cast<DILocalScope>(ScopeNode))); +} + +void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped( + InlinedVariable IV, const MDNode *ScopeNode) { + const DILocalVariable *Cleansed = nullptr; + if (getExistingAbstractVariable(IV, Cleansed)) + return; + + if (LexicalScope *Scope = + LScopes.findAbstractScope(cast_or_null<DILocalScope>(ScopeNode))) + createAbstractVariable(Cleansed, Scope); +} + +// Collect variable information from side table maintained by MMI. +void DwarfDebug::collectVariableInfoFromMMITable( + DenseSet<InlinedVariable> &Processed) { + for (const auto &VI : MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) && + "Expected inlined-at fields to agree"); + + InlinedVariable Var(VI.Var, VI.Loc->getInlinedAt()); + Processed.insert(Var); + LexicalScope *Scope = LScopes.findLexicalScope(VI.Loc); + + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + ensureAbstractVariableIsCreatedIfScoped(Var, Scope->getScopeNode()); + auto RegVar = make_unique<DbgVariable>(Var.first, Var.second, this); + RegVar->initializeMMI(VI.Expr, VI.Slot); + if (InfoHolder.addScopeVariable(Scope, RegVar.get())) + ConcreteVariables.push_back(std::move(RegVar)); + } +} + +// Get .debug_loc entry for the instruction range starting at MI. +static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) { + const DIExpression *Expr = MI->getDebugExpression(); + + assert(MI->getNumOperands() == 4); + if (MI->getOperand(0).isReg()) { + MachineLocation MLoc; + // If the second operand is an immediate, this is a + // register-indirect address. + if (!MI->getOperand(1).isImm()) + MLoc.set(MI->getOperand(0).getReg()); + else + MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); + return DebugLocEntry::Value(Expr, MLoc); + } + if (MI->getOperand(0).isImm()) + return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm()); + if (MI->getOperand(0).isFPImm()) + return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm()); + if (MI->getOperand(0).isCImm()) + return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm()); + + llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!"); +} + +/// Determine whether two variable pieces overlap. +static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2) { + if (!P1->isBitPiece() || !P2->isBitPiece()) + return true; + unsigned l1 = P1->getBitPieceOffset(); + unsigned l2 = P2->getBitPieceOffset(); + unsigned r1 = l1 + P1->getBitPieceSize(); + unsigned r2 = l2 + P2->getBitPieceSize(); + // True where [l1,r1[ and [r1,r2[ overlap. + return (l1 < r2) && (l2 < r1); +} + +/// Build the location list for all DBG_VALUEs in the function that +/// describe the same variable. If the ranges of several independent +/// pieces of the same variable overlap partially, split them up and +/// combine the ranges. The resulting DebugLocEntries are will have +/// strict monotonically increasing begin addresses and will never +/// overlap. +// +// Input: +// +// Ranges History [var, loc, piece ofs size] +// 0 | [x, (reg0, piece 0, 32)] +// 1 | | [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry +// 2 | | ... +// 3 | [clobber reg0] +// 4 [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of +// x. +// +// Output: +// +// [0-1] [x, (reg0, piece 0, 32)] +// [1-3] [x, (reg0, piece 0, 32), (reg1, piece 32, 32)] +// [3-4] [x, (reg1, piece 32, 32)] +// [4- ] [x, (mem, piece 0, 64)] +void +DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::InstrRanges &Ranges) { + SmallVector<DebugLocEntry::Value, 4> OpenRanges; + + for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) { + const MachineInstr *Begin = I->first; + const MachineInstr *End = I->second; + assert(Begin->isDebugValue() && "Invalid History entry"); + + // Check if a variable is inaccessible in this range. + if (Begin->getNumOperands() > 1 && + Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) { + OpenRanges.clear(); + continue; + } + + // If this piece overlaps with any open ranges, truncate them. + const DIExpression *DIExpr = Begin->getDebugExpression(); + auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](DebugLocEntry::Value R) { + return piecesOverlap(DIExpr, R.getExpression()); + }); + OpenRanges.erase(Last, OpenRanges.end()); + + const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); + assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); + + const MCSymbol *EndLabel; + if (End != nullptr) + EndLabel = getLabelAfterInsn(End); + else if (std::next(I) == Ranges.end()) + EndLabel = Asm->getFunctionEnd(); + else + EndLabel = getLabelBeforeInsn(std::next(I)->first); + assert(EndLabel && "Forgot label after instruction ending a range!"); + + DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n"); + + auto Value = getDebugLocValue(Begin); + DebugLocEntry Loc(StartLabel, EndLabel, Value); + bool couldMerge = false; + + // If this is a piece, it may belong to the current DebugLocEntry. + if (DIExpr->isBitPiece()) { + // Add this value to the list of open ranges. + OpenRanges.push_back(Value); + + // Attempt to add the piece to the last entry. + if (!DebugLoc.empty()) + if (DebugLoc.back().MergeValues(Loc)) + couldMerge = true; + } + + if (!couldMerge) { + // Need to add a new DebugLocEntry. Add all values from still + // valid non-overlapping pieces. + if (OpenRanges.size()) + Loc.addValues(OpenRanges); + + DebugLoc.push_back(std::move(Loc)); + } + + // Attempt to coalesce the ranges of two otherwise identical + // DebugLocEntries. + auto CurEntry = DebugLoc.rbegin(); + DEBUG({ + dbgs() << CurEntry->getValues().size() << " Values:\n"; + for (auto &Value : CurEntry->getValues()) + Value.getExpression()->dump(); + dbgs() << "-----\n"; + }); + + auto PrevEntry = std::next(CurEntry); + if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry)) + DebugLoc.pop_back(); + } +} + +DbgVariable *DwarfDebug::createConcreteVariable(LexicalScope &Scope, + InlinedVariable IV) { + ensureAbstractVariableIsCreatedIfScoped(IV, Scope.getScopeNode()); + ConcreteVariables.push_back( + make_unique<DbgVariable>(IV.first, IV.second, this)); + InfoHolder.addScopeVariable(&Scope, ConcreteVariables.back().get()); + return ConcreteVariables.back().get(); +} + +// Find variables for each lexical scope. +void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU, + const DISubprogram *SP, + DenseSet<InlinedVariable> &Processed) { + // Grab the variable info that was squirreled away in the MMI side-table. + collectVariableInfoFromMMITable(Processed); + + for (const auto &I : DbgValues) { + InlinedVariable IV = I.first; + if (Processed.count(IV)) + continue; + + // Instruction ranges, specifying where IV is accessible. + const auto &Ranges = I.second; + if (Ranges.empty()) + continue; + + LexicalScope *Scope = nullptr; + if (const DILocation *IA = IV.second) + Scope = LScopes.findInlinedScope(IV.first->getScope(), IA); + else + Scope = LScopes.findLexicalScope(IV.first->getScope()); + // If variable scope is not found then skip this variable. + if (!Scope) + continue; + + Processed.insert(IV); + DbgVariable *RegVar = createConcreteVariable(*Scope, IV); + + const MachineInstr *MInsn = Ranges.front().first; + assert(MInsn->isDebugValue() && "History must begin with debug value"); + + // Check if the first DBG_VALUE is valid for the rest of the function. + if (Ranges.size() == 1 && Ranges.front().second == nullptr) { + RegVar->initializeDbgValue(MInsn); + continue; + } + + // Handle multiple DBG_VALUE instructions describing one variable. + DebugLocStream::ListBuilder List(DebugLocs, TheCU, *Asm, *RegVar, *MInsn); + + // Build the location list for this variable. + SmallVector<DebugLocEntry, 8> Entries; + buildLocationList(Entries, Ranges); + + // If the variable has an DIBasicType, extract it. Basic types cannot have + // unique identifiers, so don't bother resolving the type with the + // identifier map. + const DIBasicType *BT = dyn_cast<DIBasicType>( + static_cast<const Metadata *>(IV.first->getType())); + + // Finalize the entry by lowering it into a DWARF bytestream. + for (auto &Entry : Entries) + Entry.finalize(*Asm, List, BT); + } + + // Collect info for variables that were optimized out. + for (const DILocalVariable *DV : SP->getVariables()) { + if (Processed.insert(InlinedVariable(DV, nullptr)).second) + if (LexicalScope *Scope = LScopes.findLexicalScope(DV->getScope())) + createConcreteVariable(*Scope, InlinedVariable(DV, nullptr)); + } +} + +// Return Label preceding the instruction. +MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; +} + +// Return Label immediately following the instruction. +MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { + return LabelsAfterInsn.lookup(MI); +} + +// Process beginning of an instruction. +void DwarfDebug::beginInstruction(const MachineInstr *MI) { + assert(CurMI == nullptr); + CurMI = MI; + // Check if source location changes, but ignore DBG_VALUE locations. + if (!MI->isDebugValue()) { + DebugLoc DL = MI->getDebugLoc(); + if (DL != PrevInstLoc) { + if (DL) { + unsigned Flags = 0; + PrevInstLoc = DL; + if (DL == PrologEndLoc) { + Flags |= DWARF2_FLAG_PROLOGUE_END; + PrologEndLoc = DebugLoc(); + Flags |= DWARF2_FLAG_IS_STMT; + } + if (DL.getLine() != + Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine()) + Flags |= DWARF2_FLAG_IS_STMT; + + const MDNode *Scope = DL.getScope(); + recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags); + } else if (UnknownLocations) { + PrevInstLoc = DL; + recordSourceLine(0, 0, nullptr, 0); + } + } + } + + // Insert labels where requested. + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsBeforeInsn.find(MI); + + // No label needed. + if (I == LabelsBeforeInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + if (!PrevLabel) { + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +// Process end of an instruction. +void DwarfDebug::endInstruction() { + assert(CurMI != nullptr); + // Don't create a new label after DBG_VALUE instructions. + // They don't generate code. + if (!CurMI->isDebugValue()) + PrevLabel = nullptr; + + DenseMap<const MachineInstr *, MCSymbol *>::iterator I = + LabelsAfterInsn.find(CurMI); + CurMI = nullptr; + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(PrevLabel); + } + I->second = PrevLabel; +} + +// Each LexicalScope has first instruction and last instruction to mark +// beginning and end of a scope respectively. Create an inverse map that list +// scopes starts (and ends) with an instruction. One instruction may start (or +// end) multiple scopes. Ignore scopes that are not reachable. +void DwarfDebug::identifyScopeMarkers() { + SmallVector<LexicalScope *, 4> WorkList; + WorkList.push_back(LScopes.getCurrentFunctionScope()); + while (!WorkList.empty()) { + LexicalScope *S = WorkList.pop_back_val(); + + const SmallVectorImpl<LexicalScope *> &Children = S->getChildren(); + if (!Children.empty()) + WorkList.append(Children.begin(), Children.end()); + + if (S->isAbstractScope()) + continue; + + for (const InsnRange &R : S->getRanges()) { + assert(R.first && "InsnRange does not have first instruction!"); + assert(R.second && "InsnRange does not have second instruction!"); + requestLabelBeforeInsn(R.first); + requestLabelAfterInsn(R.second); + } + } +} + +static DebugLoc findPrologueEndLoc(const MachineFunction *MF) { + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + for (const auto &MBB : *MF) + for (const auto &MI : MBB) + if (!MI.isDebugValue() && !MI.getFlag(MachineInstr::FrameSetup) && + MI.getDebugLoc()) + return MI.getDebugLoc(); + return DebugLoc(); +} + +// Gather pre-function debug information. Assumes being called immediately +// after the function entry point has been emitted. +void DwarfDebug::beginFunction(const MachineFunction *MF) { + CurFn = MF; + + // If there's no debug info for the function we're not going to do anything. + if (!MMI->hasDebugInfo()) + return; + + auto DI = MF->getFunction()->getSubprogram(); + if (!DI) + return; + + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. + LScopes.initialize(*MF); + if (LScopes.empty()) + return; + + assert(DbgValues.empty() && "DbgValues map wasn't cleaned!"); + + // Make sure that each lexical scope will have a begin/end label. + identifyScopeMarkers(); + + // Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function + // belongs to so that we add to the correct per-cu line table in the + // non-asm case. + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + // FnScope->getScopeNode() and DI->second should represent the same function, + // though they may not be the same MDNode due to inline functions merged in + // LTO where the debug info metadata still differs (either due to distinct + // written differences - two versions of a linkonce_odr function + // written/copied into two separate files, or some sub-optimal metadata that + // isn't structurally identical (see: file path/name info from clang, which + // includes the directory of the cpp file being built, even when the file name + // is absolute (such as an <> lookup header))) + DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert(TheCU && "Unable to find compile unit!"); + if (Asm->OutStreamer->hasRawTextSupport()) + // Use a single line table if we are generating assembly. + Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); + else + Asm->OutStreamer->getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + + // Calculate history for local variables. + calculateDbgValueHistory(MF, Asm->MF->getSubtarget().getRegisterInfo(), + DbgValues); + + // Request labels for the full history. + for (const auto &I : DbgValues) { + const auto &Ranges = I.second; + if (Ranges.empty()) + continue; + + // The first mention of a function argument gets the CurrentFnBegin + // label, so arguments are visible when breaking at function entry. + const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable(); + if (DIVar->isParameter() && + getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) { + LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin(); + if (Ranges.front().first->getDebugExpression()->isBitPiece()) { + // Mark all non-overlapping initial pieces. + for (auto I = Ranges.begin(); I != Ranges.end(); ++I) { + const DIExpression *Piece = I->first->getDebugExpression(); + if (std::all_of(Ranges.begin(), I, + [&](DbgValueHistoryMap::InstrRange Pred) { + return !piecesOverlap(Piece, Pred.first->getDebugExpression()); + })) + LabelsBeforeInsn[I->first] = Asm->getFunctionBegin(); + else + break; + } + } + } + + for (const auto &Range : Ranges) { + requestLabelBeforeInsn(Range.first); + if (Range.second) + requestLabelAfterInsn(Range.second); + } + } + + PrevInstLoc = DebugLoc(); + PrevLabel = Asm->getFunctionBegin(); + + // Record beginning of function. + PrologEndLoc = findPrologueEndLoc(MF); + if (DILocation *L = PrologEndLoc) { + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + auto *SP = L->getInlinedAtScope()->getSubprogram(); + recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT); + } +} + +// Gather and emit post-function debug information. +void DwarfDebug::endFunction(const MachineFunction *MF) { + assert(CurFn == MF && + "endFunction should be called with the same function as beginFunction"); + + if (!MMI->hasDebugInfo() || LScopes.empty() || + !MF->getFunction()->getSubprogram()) { + // If we don't have a lexical scope for this function then there will + // be a hole in the range information. Keep note of this by setting the + // previously used section to nullptr. + PrevCU = nullptr; + CurFn = nullptr; + return; + } + + // Set DwarfDwarfCompileUnitID in MCContext to default value. + Asm->OutStreamer->getContext().setDwarfCompileUnitID(0); + + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + auto *SP = cast<DISubprogram>(FnScope->getScopeNode()); + DwarfCompileUnit &TheCU = *SPMap.lookup(SP); + + DenseSet<InlinedVariable> ProcessedVars; + collectVariableInfo(TheCU, SP, ProcessedVars); + + // Add the range of this function to the list of ranges for the CU. + TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd())); + + // Under -gmlt, skip building the subprogram if there are no inlined + // subroutines inside it. + if (TheCU.getCUNode()->getEmissionKind() == DIBuilder::LineTablesOnly && + LScopes.getAbstractScopesList().empty() && !IsDarwin) { + assert(InfoHolder.getScopeVariables().empty()); + assert(DbgValues.empty()); + // FIXME: This wouldn't be true in LTO with a -g (with inlining) CU followed + // by a -gmlt CU. Add a test and remove this assertion. + assert(AbstractVariables.empty()); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); + PrevLabel = nullptr; + CurFn = nullptr; + return; + } + +#ifndef NDEBUG + size_t NumAbstractScopes = LScopes.getAbstractScopesList().size(); +#endif + // Construct abstract scopes. + for (LexicalScope *AScope : LScopes.getAbstractScopesList()) { + auto *SP = cast<DISubprogram>(AScope->getScopeNode()); + // Collect info for variables that were optimized out. + for (const DILocalVariable *DV : SP->getVariables()) { + if (!ProcessedVars.insert(InlinedVariable(DV, nullptr)).second) + continue; + ensureAbstractVariableIsCreated(InlinedVariable(DV, nullptr), + DV->getScope()); + assert(LScopes.getAbstractScopesList().size() == NumAbstractScopes + && "ensureAbstractVariableIsCreated inserted abstract scopes"); + } + constructAbstractSubprogramScopeDIE(AScope); + } + + TheCU.constructSubprogramScopeDIE(FnScope); + if (auto *SkelCU = TheCU.getSkeleton()) + if (!LScopes.getAbstractScopesList().empty()) + SkelCU->constructSubprogramScopeDIE(FnScope); + + // Clear debug info + // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the + // DbgVariables except those that are also in AbstractVariables (since they + // can be used cross-function) + InfoHolder.getScopeVariables().clear(); + DbgValues.clear(); + LabelsBeforeInsn.clear(); + LabelsAfterInsn.clear(); + PrevLabel = nullptr; + CurFn = nullptr; +} + +// Register a source line with debug info. Returns the unique label that was +// emitted and which provides correspondence to the source line list. +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, + unsigned Flags) { + StringRef Fn; + StringRef Dir; + unsigned Src = 1; + unsigned Discriminator = 0; + if (auto *Scope = cast_or_null<DIScope>(S)) { + Fn = Scope->getFilename(); + Dir = Scope->getDirectory(); + if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope)) + Discriminator = LBF->getDiscriminator(); + + unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID(); + Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID]) + .getOrCreateSourceID(Fn, Dir); + } + Asm->OutStreamer->EmitDwarfLocDirective(Src, Line, Col, Flags, 0, + Discriminator, Fn); +} + +//===----------------------------------------------------------------------===// +// Emit Methods +//===----------------------------------------------------------------------===// + +// Emit the debug info section. +void DwarfDebug::emitDebugInfo() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitUnits(/* UseOffsets */ false); +} + +// Emit the abbreviation section. +void DwarfDebug::emitAbbreviations() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + Holder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); +} + +void DwarfDebug::emitAccel(DwarfAccelTable &Accel, MCSection *Section, + StringRef TableName) { + Accel.FinalizeTable(Asm, TableName); + Asm->OutStreamer->SwitchSection(Section); + + // Emit the full data. + Accel.emit(Asm, Section->getBeginSymbol(), this); +} + +// Emit visible names into a hashed accelerator table section. +void DwarfDebug::emitAccelNames() { + emitAccel(AccelNames, Asm->getObjFileLowering().getDwarfAccelNamesSection(), + "Names"); +} + +// Emit objective C classes and categories into a hashed accelerator table +// section. +void DwarfDebug::emitAccelObjC() { + emitAccel(AccelObjC, Asm->getObjFileLowering().getDwarfAccelObjCSection(), + "ObjC"); +} + +// Emit namespace dies into a hashed accelerator table. +void DwarfDebug::emitAccelNamespaces() { + emitAccel(AccelNamespace, + Asm->getObjFileLowering().getDwarfAccelNamespaceSection(), + "namespac"); +} + +// Emit type dies into a hashed accelerator table. +void DwarfDebug::emitAccelTypes() { + emitAccel(AccelTypes, Asm->getObjFileLowering().getDwarfAccelTypesSection(), + "types"); +} + +// Public name handling. +// The format for the various pubnames: +// +// dwarf pubnames - offset/name pairs where the offset is the offset into the CU +// for the DIE that is named. +// +// gnu pubnames - offset/index value/name tuples where the offset is the offset +// into the CU and the index value is computed according to the type of value +// for the DIE that is named. +// +// For type units the offset is the offset of the skeleton DIE. For split dwarf +// it's the offset within the debug_info/debug_types dwo section, however, the +// reference in the pubname header doesn't change. + +/// computeIndexValue - Compute the gdb index value for the DIE and CU. +static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, + const DIE *Die) { + dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; + + // We could have a specification DIE that has our most of our knowledge, + // look for that now. + if (DIEValue SpecVal = Die->findAttribute(dwarf::DW_AT_specification)) { + DIE &SpecDIE = SpecVal.getDIEEntry().getEntry(); + if (SpecDIE.findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + } else if (Die->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + + switch (Die->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + return dwarf::PubIndexEntryDescriptor( + dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus + ? dwarf::GIEL_STATIC + : dwarf::GIEL_EXTERNAL); + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_subrange_type: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); + case dwarf::DW_TAG_namespace: + return dwarf::GIEK_TYPE; + case dwarf::DW_TAG_subprogram: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); + case dwarf::DW_TAG_variable: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); + case dwarf::DW_TAG_enumerator: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, + dwarf::GIEL_STATIC); + default: + return dwarf::GIEK_NONE; + } +} + +/// emitDebugPubNames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubNames(bool GnuStyle) { + MCSection *PSec = GnuStyle + ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection(); + + emitDebugPubSection(GnuStyle, PSec, "Names", + &DwarfCompileUnit::getGlobalNames); +} + +void DwarfDebug::emitDebugPubSection( + bool GnuStyle, MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const) { + for (const auto &NU : CUMap) { + DwarfCompileUnit *TheU = NU.second; + + const auto &Globals = (TheU->*Accessor)(); + + if (Globals.empty()) + continue; + + if (auto *Skeleton = TheU->getSkeleton()) + TheU = Skeleton; + + // Start the dwarf pubnames section. + Asm->OutStreamer->SwitchSection(PSec); + + // Emit the header. + Asm->OutStreamer->AddComment("Length of Public " + Name + " Info"); + MCSymbol *BeginLabel = Asm->createTempSymbol("pub" + Name + "_begin"); + MCSymbol *EndLabel = Asm->createTempSymbol("pub" + Name + "_end"); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + + Asm->OutStreamer->EmitLabel(BeginLabel); + + Asm->OutStreamer->AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); + + Asm->OutStreamer->AddComment("Offset of Compilation Unit Info"); + Asm->emitDwarfSymbolReference(TheU->getLabelBegin()); + + Asm->OutStreamer->AddComment("Compilation Unit Length"); + Asm->EmitInt32(TheU->getLength()); + + // Emit the pubnames for this compilation unit. + for (const auto &GI : Globals) { + const char *Name = GI.getKeyData(); + const DIE *Entity = GI.second; + + Asm->OutStreamer->AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheU, Entity); + Asm->OutStreamer->AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + + Asm->OutStreamer->AddComment("External Name"); + Asm->OutStreamer->EmitBytes(StringRef(Name, GI.getKeyLength() + 1)); + } + + Asm->OutStreamer->AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer->EmitLabel(EndLabel); + } +} + +void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { + MCSection *PSec = GnuStyle + ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection(); + + emitDebugPubSection(GnuStyle, PSec, "Types", + &DwarfCompileUnit::getGlobalTypes); +} + +// Emit visible names into a debug str section. +void DwarfDebug::emitDebugStr() { + DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); +} + +void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocStream::Entry &Entry) { + auto &&Comments = DebugLocs.getComments(Entry); + auto Comment = Comments.begin(); + auto End = Comments.end(); + for (uint8_t Byte : DebugLocs.getBytes(Entry)) + Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : ""); +} + +static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, + ByteStreamer &Streamer, + const DebugLocEntry::Value &Value, + unsigned PieceOffsetInBits) { + DebugLocDwarfExpression DwarfExpr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); + // Regular entry. + if (Value.isInt()) { + if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || + BT->getEncoding() == dwarf::DW_ATE_signed_char)) + DwarfExpr.AddSignedConstant(Value.getInt()); + else + DwarfExpr.AddUnsignedConstant(Value.getInt()); + } else if (Value.isLocation()) { + MachineLocation Loc = Value.getLoc(); + const DIExpression *Expr = Value.getExpression(); + if (!Expr || !Expr->getNumElements()) + // Regular entry. + AP.EmitDwarfRegOp(Streamer, Loc); + else { + // Complex address entry. + if (Loc.getOffset()) { + DwarfExpr.AddMachineRegIndirect(Loc.getReg(), Loc.getOffset()); + DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(), + PieceOffsetInBits); + } else + DwarfExpr.AddMachineRegExpression(Expr, Loc.getReg(), + PieceOffsetInBits); + } + } + // else ... ignore constant fp. There is not any good way to + // to represent them here in dwarf. + // FIXME: ^ +} + +void DebugLocEntry::finalize(const AsmPrinter &AP, + DebugLocStream::ListBuilder &List, + const DIBasicType *BT) { + DebugLocStream::EntryBuilder Entry(List, Begin, End); + BufferByteStreamer Streamer = Entry.getStreamer(); + const DebugLocEntry::Value &Value = Values[0]; + if (Value.isBitPiece()) { + // Emit all pieces that belong to the same variable and range. + assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) { + return P.isBitPiece(); + }) && "all values are expected to be pieces"); + assert(std::is_sorted(Values.begin(), Values.end()) && + "pieces are expected to be sorted"); + + unsigned Offset = 0; + for (auto Piece : Values) { + const DIExpression *Expr = Piece.getExpression(); + unsigned PieceOffset = Expr->getBitPieceOffset(); + unsigned PieceSize = Expr->getBitPieceSize(); + assert(Offset <= PieceOffset && "overlapping or duplicate pieces"); + if (Offset < PieceOffset) { + // The DWARF spec seriously mandates pieces with no locations for gaps. + DebugLocDwarfExpression Expr(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion(), + Streamer); + Expr.AddOpPiece(PieceOffset-Offset, 0); + Offset += PieceOffset-Offset; + } + Offset += PieceSize; + + emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset); + } + } else { + assert(Values.size() == 1 && "only pieces may have >1 value"); + emitDebugLocValue(AP, BT, Streamer, Value, 0); + } +} + +void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) { + // Emit the size. + Asm->OutStreamer->AddComment("Loc expr size"); + Asm->EmitInt16(DebugLocs.getBytes(Entry).size()); + + // Emit the entry. + APByteStreamer Streamer(*Asm); + emitDebugLocEntry(Streamer, Entry); +} + +// Emit locations into the debug loc section. +void DwarfDebug::emitDebugLoc() { + // Start the dwarf loc section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocSection()); + unsigned char Size = Asm->getDataLayout().getPointerSize(); + for (const auto &List : DebugLocs.getLists()) { + Asm->OutStreamer->EmitLabel(List.Label); + const DwarfCompileUnit *CU = List.CU; + for (const auto &Entry : DebugLocs.getEntries(List)) { + // Set up the range. This range is relative to the entry point of the + // compile unit. This is a hard coded 0 for low_pc when we're emitting + // ranges, or the DW_AT_low_pc on the compile unit otherwise. + if (auto *Base = CU->getBaseAddress()) { + Asm->EmitLabelDifference(Entry.BeginSym, Base, Size); + Asm->EmitLabelDifference(Entry.EndSym, Base, Size); + } else { + Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size); + Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size); + } + + emitDebugLocEntryLocation(Entry); + } + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } +} + +void DwarfDebug::emitDebugLocDWO() { + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLocDWOSection()); + for (const auto &List : DebugLocs.getLists()) { + Asm->OutStreamer->EmitLabel(List.Label); + for (const auto &Entry : DebugLocs.getEntries(List)) { + // Just always use start_length for now - at least that's one address + // rather than two. We could get fancier and try to, say, reuse an + // address we know we've emitted elsewhere (the start of the function? + // The start of the CU or CU subrange that encloses this range?) + Asm->EmitInt8(dwarf::DW_LLE_start_length_entry); + unsigned idx = AddrPool.getIndex(Entry.BeginSym); + Asm->EmitULEB128(idx); + Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4); + + emitDebugLocEntryLocation(Entry); + } + Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry); + } +} + +struct ArangeSpan { + const MCSymbol *Start, *End; +}; + +// Emit a debug aranges section, containing a CU lookup for any +// address we can tie back to a CU. +void DwarfDebug::emitDebugARanges() { + // Provides a unique id per text section. + MapVector<MCSection *, SmallVector<SymbolCU, 8>> SectionMap; + + // Filter labels by section. + for (const SymbolCU &SCU : ArangeLabels) { + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[nullptr].push_back(SCU); + } + } + + // Add terminating symbols for each section. + for (const auto &I : SectionMap) { + MCSection *Section = I.first; + MCSymbol *Sym = nullptr; + + if (Section) + Sym = Asm->OutStreamer->endSection(Section); + + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(nullptr, Sym)); + } + + DenseMap<DwarfCompileUnit *, std::vector<ArangeSpan>> Spans; + + for (auto &I : SectionMap) { + const MCSection *Section = I.first; + SmallVector<SymbolCU, 8> &List = I.second; + if (List.size() < 2) + continue; + + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (!Section) { + for (const SymbolCU &Cur : List) { + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = nullptr; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + continue; + } + + // Sort the symbols by offset within the section. + std::sort(List.begin(), List.end(), + [&](const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + return false; + if (IB == 0) + return true; + return IA < IB; + }); + + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1, e = List.size(); n < e; n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { + ArangeSpan Span; + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; + } + } + } + + // Start the dwarf aranges section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfARangesSection()); + + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + + // Build a list of CUs used. + std::vector<DwarfCompileUnit *> CUs; + for (const auto &it : Spans) { + DwarfCompileUnit *CU = it.first; + CUs.push_back(CU); + } + + // Sort the CU list (again, to ensure consistent output order). + std::sort(CUs.begin(), CUs.end(), [](const DwarfUnit *A, const DwarfUnit *B) { + return A->getUniqueID() < B->getUniqueID(); + }); + + // Emit an arange table for each CU we used. + for (DwarfCompileUnit *CU : CUs) { + std::vector<ArangeSpan> &List = Spans[CU]; + + // Describe the skeleton CU's offset and length, not the dwo file's. + if (auto *Skel = CU->getSkeleton()) + CU = Skel; + + // Emit size of content not including length itself. + unsigned ContentSize = + sizeof(int16_t) + // DWARF ARange version number + sizeof(int32_t) + // Offset of CU in the .debug_info section + sizeof(int8_t) + // Pointer Size (in bytes) + sizeof(int8_t); // Segment Size (in bytes) + + unsigned TupleSize = PtrSize * 2; + + // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. + unsigned Padding = + OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize); + + ContentSize += Padding; + ContentSize += (List.size() + 1) * TupleSize; + + // For each compile unit, write the list of spans it covers. + Asm->OutStreamer->AddComment("Length of ARange Set"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer->AddComment("DWARF Arange version number"); + Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); + Asm->OutStreamer->AddComment("Offset Into Debug Info Section"); + Asm->emitDwarfSymbolReference(CU->getLabelBegin()); + Asm->OutStreamer->AddComment("Address Size (in bytes)"); + Asm->EmitInt8(PtrSize); + Asm->OutStreamer->AddComment("Segment Size (in bytes)"); + Asm->EmitInt8(0); + + Asm->OutStreamer->EmitFill(Padding, 0xff); + + for (const ArangeSpan &Span : List) { + Asm->EmitLabelReference(Span.Start, PtrSize); + + // Calculate the size as being from the span start to it's end. + if (Span.End) { + Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + } else { + // For symbols without an end marker (e.g. common), we + // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; + if (Size == 0) + Size = 1; + + Asm->OutStreamer->EmitIntValue(Size, PtrSize); + } + } + + Asm->OutStreamer->AddComment("ARange terminator"); + Asm->OutStreamer->EmitIntValue(0, PtrSize); + Asm->OutStreamer->EmitIntValue(0, PtrSize); + } +} + +// Emit visible names into a debug ranges section. +void DwarfDebug::emitDebugRanges() { + // Start the dwarf ranges section. + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfRangesSection()); + + // Size for our labels. + unsigned char Size = Asm->getDataLayout().getPointerSize(); + + // Grab the specific ranges for the compile units in the module. + for (const auto &I : CUMap) { + DwarfCompileUnit *TheCU = I.second; + + if (auto *Skel = TheCU->getSkeleton()) + TheCU = Skel; + + // Iterate over the misc ranges for the compile units in the module. + for (const RangeSpanList &List : TheCU->getRangeLists()) { + // Emit our symbol so we can find the beginning of the range. + Asm->OutStreamer->EmitLabel(List.getSym()); + + for (const RangeSpan &Range : List.getRanges()) { + const MCSymbol *Begin = Range.getStart(); + const MCSymbol *End = Range.getEnd(); + assert(Begin && "Range without a begin symbol?"); + assert(End && "Range without an end symbol?"); + if (auto *Base = TheCU->getBaseAddress()) { + Asm->EmitLabelDifference(Begin, Base, Size); + Asm->EmitLabelDifference(End, Base, Size); + } else { + Asm->OutStreamer->EmitSymbolValue(Begin, Size); + Asm->OutStreamer->EmitSymbolValue(End, Size); + } + } + + // And terminate the list with two 0 values. + Asm->OutStreamer->EmitIntValue(0, Size); + Asm->OutStreamer->EmitIntValue(0, Size); + } + } +} + +// DWARF5 Experimental Separate Dwarf emitters. + +void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr<DwarfUnit> NewU) { + NewU->addString(Die, dwarf::DW_AT_GNU_dwo_name, + U.getCUNode()->getSplitDebugFilename()); + + if (!CompilationDir.empty()) + NewU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + addGnuPubAttributes(*NewU, Die); + + SkeletonHolder.addUnit(std::move(NewU)); +} + +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, +// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, +// DW_AT_addr_base, DW_AT_ranges_base. +DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) { + + auto OwnedUnit = make_unique<DwarfCompileUnit>( + CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder); + DwarfCompileUnit &NewCU = *OwnedUnit; + NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection()); + + NewCU.initStmtList(); + + initSkeletonUnit(CU, NewCU.getUnitDie(), std::move(OwnedUnit)); + + return NewCU; +} + +// Emit the .debug_info.dwo section for separated dwarf. This contains the +// compile units that would normally be in debug_info. +void DwarfDebug::emitDebugInfoDWO() { + assert(useSplitDwarf() && "No split dwarf debug info?"); + // Don't emit relocations into the dwo file. + InfoHolder.emitUnits(/* UseOffsets */ true); +} + +// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the +// abbreviations for the .debug_info.dwo section. +void DwarfDebug::emitDebugAbbrevDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + InfoHolder.emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection()); +} + +void DwarfDebug::emitDebugLineDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getDwarfLineDWOSection()); + SplitTypeUnitFileTable.Emit(*Asm->OutStreamer, MCDwarfLineTableParams()); +} + +// Emit the .debug_str.dwo section for separated dwarf. This contains the +// string section and is identical in format to traditional .debug_str +// sections. +void DwarfDebug::emitDebugStrDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + MCSection *OffSec = Asm->getObjFileLowering().getDwarfStrOffDWOSection(); + InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), + OffSec); +} + +MCDwarfDwoLineTable *DwarfDebug::getDwoLineTable(const DwarfCompileUnit &CU) { + if (!useSplitDwarf()) + return nullptr; + if (SingleCU) + SplitTypeUnitFileTable.setCompilationDir(CU.getCUNode()->getDirectory()); + return &SplitTypeUnitFileTable; +} + +uint64_t DwarfDebug::makeTypeSignature(StringRef Identifier) { + MD5 Hash; + Hash.update(Identifier); + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + MD5::MD5Result Result; + Hash.final(Result); + return support::endian::read64le(Result + 8); +} + +void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU, + StringRef Identifier, DIE &RefDie, + const DICompositeType *CTy) { + // Fast path if we're building some type units and one has already used the + // address pool we know we're going to throw away all this work anyway, so + // don't bother building dependent types. + if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed()) + return; + + const DwarfTypeUnit *&TU = DwarfTypeUnits[CTy]; + if (TU) { + CU.addDIETypeSignature(RefDie, *TU); + return; + } + + bool TopLevelType = TypeUnitsUnderConstruction.empty(); + AddrPool.resetUsedFlag(); + + auto OwnedUnit = make_unique<DwarfTypeUnit>( + InfoHolder.getUnits().size() + TypeUnitsUnderConstruction.size(), CU, Asm, + this, &InfoHolder, getDwoLineTable(CU)); + DwarfTypeUnit &NewTU = *OwnedUnit; + DIE &UnitDie = NewTU.getUnitDie(); + TU = &NewTU; + TypeUnitsUnderConstruction.push_back( + std::make_pair(std::move(OwnedUnit), CTy)); + + NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + CU.getLanguage()); + + uint64_t Signature = makeTypeSignature(Identifier); + NewTU.setTypeSignature(Signature); + + if (useSplitDwarf()) + NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection()); + else { + CU.applyStmtList(UnitDie); + NewTU.initSection( + Asm->getObjFileLowering().getDwarfTypesSection(Signature)); + } + + NewTU.setType(NewTU.createTypeDIE(CTy)); + + if (TopLevelType) { + auto TypeUnitsToAdd = std::move(TypeUnitsUnderConstruction); + TypeUnitsUnderConstruction.clear(); + + // Types referencing entries in the address table cannot be placed in type + // units. + if (AddrPool.hasBeenUsed()) { + + // Remove all the types built while building this type. + // This is pessimistic as some of these types might not be dependent on + // the type that used an address. + for (const auto &TU : TypeUnitsToAdd) + DwarfTypeUnits.erase(TU.second); + + // Construct this type in the CU directly. + // This is inefficient because all the dependent types will be rebuilt + // from scratch, including building them in type units, discovering that + // they depend on addresses, throwing them out and rebuilding them. + CU.constructTypeDIE(RefDie, cast<DICompositeType>(CTy)); + return; + } + + // If the type wasn't dependent on fission addresses, finish adding the type + // and all its dependent types. + for (auto &TU : TypeUnitsToAdd) + InfoHolder.addUnit(std::move(TU.first)); + } + CU.addDIETypeSignature(RefDie, NewTU); +} + +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void DwarfDebug::addAccelName(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelNames.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +} + +void DwarfDebug::addAccelObjC(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelObjC.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +} + +void DwarfDebug::addAccelNamespace(StringRef Name, const DIE &Die) { + if (!useDwarfAccelTables()) + return; + AccelNamespace.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +} + +void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) { + if (!useDwarfAccelTables()) + return; + AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h new file mode 100644 index 0000000..4c613a9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -0,0 +1,617 @@ +//===-- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework ------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFDEBUG_H + +#include "AsmPrinterHandler.h" +#include "DbgValueHistoryCalculator.h" +#include "DebugLocStream.h" +#include "DwarfAccelTable.h" +#include "DwarfFile.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Target/TargetOptions.h" +#include <memory> + +namespace llvm { + +class AsmPrinter; +class ByteStreamer; +class ConstantInt; +class ConstantFP; +class DebugLocEntry; +class DwarfCompileUnit; +class DwarfDebug; +class DwarfTypeUnit; +class DwarfUnit; +class MachineModuleInfo; + +//===----------------------------------------------------------------------===// +/// This class is used to track local variable information. +/// +/// Variables can be created from allocas, in which case they're generated from +/// the MMI table. Such variables can have multiple expressions and frame +/// indices. The \a Expr and \a FrameIndices array must match. +/// +/// Variables can be created from \c DBG_VALUE instructions. Those whose +/// location changes over time use \a DebugLocListIndex, while those with a +/// single instruction use \a MInsn and (optionally) a single entry of \a Expr. +/// +/// Variables that have been optimized out use none of these fields. +class DbgVariable { + const DILocalVariable *Var; /// Variable Descriptor. + const DILocation *IA; /// Inlined at location. + SmallVector<const DIExpression *, 1> Expr; /// Complex address. + DIE *TheDIE = nullptr; /// Variable DIE. + unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs. + const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction. + SmallVector<int, 1> FrameIndex; /// Frame index. + DwarfDebug *DD; + +public: + /// Construct a DbgVariable. + /// + /// Creates a variable without any DW_AT_location. Call \a initializeMMI() + /// for MMI entries, or \a initializeDbgValue() for DBG_VALUE instructions. + DbgVariable(const DILocalVariable *V, const DILocation *IA, DwarfDebug *DD) + : Var(V), IA(IA), DD(DD) {} + + /// Initialize from the MMI table. + void initializeMMI(const DIExpression *E, int FI) { + assert(Expr.empty() && "Already initialized?"); + assert(FrameIndex.empty() && "Already initialized?"); + assert(!MInsn && "Already initialized?"); + + assert((!E || E->isValid()) && "Expected valid expression"); + assert(~FI && "Expected valid index"); + + Expr.push_back(E); + FrameIndex.push_back(FI); + } + + /// Initialize from a DBG_VALUE instruction. + void initializeDbgValue(const MachineInstr *DbgValue) { + assert(Expr.empty() && "Already initialized?"); + assert(FrameIndex.empty() && "Already initialized?"); + assert(!MInsn && "Already initialized?"); + + assert(Var == DbgValue->getDebugVariable() && "Wrong variable"); + assert(IA == DbgValue->getDebugLoc()->getInlinedAt() && "Wrong inlined-at"); + + MInsn = DbgValue; + if (auto *E = DbgValue->getDebugExpression()) + if (E->getNumElements()) + Expr.push_back(E); + } + + // Accessors. + const DILocalVariable *getVariable() const { return Var; } + const DILocation *getInlinedAt() const { return IA; } + ArrayRef<const DIExpression *> getExpression() const { return Expr; } + void setDIE(DIE &D) { TheDIE = &D; } + DIE *getDIE() const { return TheDIE; } + void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; } + unsigned getDebugLocListIndex() const { return DebugLocListIndex; } + StringRef getName() const { return Var->getName(); } + const MachineInstr *getMInsn() const { return MInsn; } + ArrayRef<int> getFrameIndex() const { return FrameIndex; } + + void addMMIEntry(const DbgVariable &V) { + assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry"); + assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry"); + assert(V.Var == Var && "conflicting variable"); + assert(V.IA == IA && "conflicting inlined-at location"); + + assert(!FrameIndex.empty() && "Expected an MMI entry"); + assert(!V.FrameIndex.empty() && "Expected an MMI entry"); + assert(Expr.size() == FrameIndex.size() && "Mismatched expressions"); + assert(V.Expr.size() == V.FrameIndex.size() && "Mismatched expressions"); + + Expr.append(V.Expr.begin(), V.Expr.end()); + FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end()); + assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) { + return E && E->isBitPiece(); + }) && "conflicting locations for variable"); + } + + // Translate tag to proper Dwarf tag. + dwarf::Tag getTag() const { + // FIXME: Why don't we just infer this tag and store it all along? + if (Var->isParameter()) + return dwarf::DW_TAG_formal_parameter; + + return dwarf::DW_TAG_variable; + } + /// Return true if DbgVariable is artificial. + bool isArtificial() const { + if (Var->isArtificial()) + return true; + if (getType()->isArtificial()) + return true; + return false; + } + + bool isObjectPointer() const { + if (Var->isObjectPointer()) + return true; + if (getType()->isObjectPointer()) + return true; + return false; + } + + bool hasComplexAddress() const { + assert(MInsn && "Expected DBG_VALUE, not MMI variable"); + assert(FrameIndex.empty() && "Expected DBG_VALUE, not MMI variable"); + assert( + (Expr.empty() || (Expr.size() == 1 && Expr.back()->getNumElements())) && + "Invalid Expr for DBG_VALUE"); + return !Expr.empty(); + } + bool isBlockByrefVariable() const; + const DIType *getType() const; + +private: + /// Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const; +}; + + +/// Helper used to pair up a symbol and its DWARF compile unit. +struct SymbolCU { + SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + const MCSymbol *Sym; + DwarfCompileUnit *CU; +}; + +/// Collects and handles dwarf debug information. +class DwarfDebug : public AsmPrinterHandler { + /// Target of Dwarf emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + /// Maps MDNode with its corresponding DwarfCompileUnit. + MapVector<const MDNode *, DwarfCompileUnit *> CUMap; + + /// Maps subprogram MDNode with its corresponding DwarfCompileUnit. + MapVector<const MDNode *, DwarfCompileUnit *> SPMap; + + /// Maps a CU DIE with its corresponding DwarfCompileUnit. + DenseMap<const DIE *, DwarfCompileUnit *> CUDieMap; + + /// List of all labels used in aranges generation. + std::vector<SymbolCU> ArangeLabels; + + /// Size of each symbol emitted (for those symbols that have a specific size). + DenseMap<const MCSymbol *, uint64_t> SymSize; + + LexicalScopes LScopes; + + /// Collection of abstract variables. + DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; + + /// Collection of DebugLocEntry. Stored in a linked list so that DIELocLists + /// can refer to them in spite of insertions into this list. + DebugLocStream DebugLocs; + + /// This is a collection of subprogram MDNodes that are processed to + /// create DIEs. + SmallPtrSet<const MDNode *, 16> ProcessedSPNodes; + + /// Maps instruction with label emitted before instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsBeforeInsn; + + /// Maps instruction with label emitted after instruction. + DenseMap<const MachineInstr *, MCSymbol *> LabelsAfterInsn; + + /// History of DBG_VALUE and clobber instructions for each user + /// variable. Variables are listed in order of appearance. + DbgValueHistoryMap DbgValues; + + /// Previous instruction's location information. This is used to + /// determine label location to indicate scope boundries in dwarf + /// debug info. + DebugLoc PrevInstLoc; + MCSymbol *PrevLabel; + + /// This location indicates end of function prologue and beginning of + /// function body. + DebugLoc PrologEndLoc; + + /// If nonnull, stores the current machine function we're processing. + const MachineFunction *CurFn; + + /// If nonnull, stores the current machine instruction we're processing. + const MachineInstr *CurMI; + + /// If nonnull, stores the CU in which the previous subprogram was contained. + const DwarfCompileUnit *PrevCU; + + /// As an optimization, there is no need to emit an entry in the directory + /// table for the same directory as DW_AT_comp_dir. + StringRef CompilationDir; + + /// Holder for the file specific debug information. + DwarfFile InfoHolder; + + /// Holders for the various debug information flags that we might need to + /// have exposed. See accessor functions below for description. + + /// Map from MDNodes for user-defined types to the type units that + /// describe them. + DenseMap<const MDNode *, const DwarfTypeUnit *> DwarfTypeUnits; + + SmallVector< + std::pair<std::unique_ptr<DwarfTypeUnit>, const DICompositeType *>, 1> + TypeUnitsUnderConstruction; + + /// Whether to emit the pubnames/pubtypes sections. + bool HasDwarfPubSections; + + /// Whether to use the GNU TLS opcode (instead of the standard opcode). + bool UseGNUTLSOpcode; + + /// Whether to emit DW_AT_[MIPS_]linkage_name. + bool UseLinkageNames; + + /// Version of dwarf we're emitting. + unsigned DwarfVersion; + + /// Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; + + /// DWARF5 Experimental Options + /// @{ + bool HasDwarfAccelTables; + bool HasSplitDwarf; + + /// Separated Dwarf Variables + /// In general these will all be for bits that are left in the + /// original object file, rather than things that are meant + /// to be in the .dwo sections. + + /// Holder for the skeleton information. + DwarfFile SkeletonHolder; + + /// Store file names for type units under fission in a line table + /// header that will be emitted into debug_line.dwo. + // FIXME: replace this with a map from comp_dir to table so that we + // can emit multiple tables during LTO each of which uses directory + // 0, referencing the comp_dir of all the type units that use it. + MCDwarfDwoLineTable SplitTypeUnitFileTable; + /// @} + + /// True iff there are multiple CUs in this module. + bool SingleCU; + bool IsDarwin; + + AddressPool AddrPool; + + DwarfAccelTable AccelNames; + DwarfAccelTable AccelObjC; + DwarfAccelTable AccelNamespace; + DwarfAccelTable AccelTypes; + + // Identify a debugger for "tuning" the debug info. + DebuggerKind DebuggerTuning; + + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); + + const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { + return InfoHolder.getUnits(); + } + + typedef DbgValueHistoryMap::InlinedVariable InlinedVariable; + + /// Find abstract variable associated with Var. + DbgVariable *getExistingAbstractVariable(InlinedVariable IV, + const DILocalVariable *&Cleansed); + DbgVariable *getExistingAbstractVariable(InlinedVariable IV); + void createAbstractVariable(const DILocalVariable *DV, LexicalScope *Scope); + void ensureAbstractVariableIsCreated(InlinedVariable Var, + const MDNode *Scope); + void ensureAbstractVariableIsCreatedIfScoped(InlinedVariable Var, + const MDNode *Scope); + + DbgVariable *createConcreteVariable(LexicalScope &Scope, InlinedVariable IV); + + /// Construct a DIE for this abstract scope. + void constructAbstractSubprogramScopeDIE(LexicalScope *Scope); + + /// Collect info for variables that were optimized out. + void collectDeadVariables(); + + void finishVariableDefinitions(); + + void finishSubprogramDefinitions(); + + /// Finish off debug information after all functions have been + /// processed. + void finalizeModuleInfo(); + + /// Emit the debug info section. + void emitDebugInfo(); + + /// Emit the abbreviation section. + void emitAbbreviations(); + + /// Emit a specified accelerator table. + void emitAccel(DwarfAccelTable &Accel, MCSection *Section, + StringRef TableName); + + /// Emit visible names into a hashed accelerator table section. + void emitAccelNames(); + + /// Emit objective C classes and categories into a hashed + /// accelerator table section. + void emitAccelObjC(); + + /// Emit namespace dies into a hashed accelerator table. + void emitAccelNamespaces(); + + /// Emit type dies into a hashed accelerator table. + void emitAccelTypes(); + + /// Emit visible names into a debug pubnames section. + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubNames(bool GnuStyle = false); + + /// Emit visible types into a debug pubtypes section. + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubTypes(bool GnuStyle = false); + + void emitDebugPubSection( + bool GnuStyle, MCSection *PSec, StringRef Name, + const StringMap<const DIE *> &(DwarfCompileUnit::*Accessor)() const); + + /// Emit visible names into a debug str section. + void emitDebugStr(); + + /// Emit visible names into a debug loc section. + void emitDebugLoc(); + + /// Emit visible names into a debug loc dwo section. + void emitDebugLocDWO(); + + /// Emit visible names into a debug aranges section. + void emitDebugARanges(); + + /// Emit visible names into a debug ranges section. + void emitDebugRanges(); + + /// DWARF 5 Experimental Split Dwarf Emitters + + /// Initialize common features of skeleton units. + void initSkeletonUnit(const DwarfUnit &U, DIE &Die, + std::unique_ptr<DwarfUnit> NewU); + + /// Construct the split debug info compile unit for the debug info + /// section. + DwarfCompileUnit &constructSkeletonCU(const DwarfCompileUnit &CU); + + /// Emit the debug info dwo section. + void emitDebugInfoDWO(); + + /// Emit the debug abbrev dwo section. + void emitDebugAbbrevDWO(); + + /// Emit the debug line dwo section. + void emitDebugLineDWO(); + + /// Emit the debug str dwo section. + void emitDebugStrDWO(); + + /// Flags to let the linker know we have emitted new style pubnames. Only + /// emit it here if we don't have a skeleton CU for split dwarf. + void addGnuPubAttributes(DwarfUnit &U, DIE &D) const; + + /// Create new DwarfCompileUnit for the given metadata node with tag + /// DW_TAG_compile_unit. + DwarfCompileUnit &constructDwarfCompileUnit(const DICompileUnit *DIUnit); + + /// Construct imported_module or imported_declaration DIE. + void constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU, + const DIImportedEntity *N); + + /// Register a source line with debug info. Returns the unique + /// label that was emitted and which provides correspondence to the + /// source line list. + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, + unsigned Flags); + + /// Indentify instructions that are marking the beginning of or + /// ending of a scope. + void identifyScopeMarkers(); + + /// Populate LexicalScope entries with variables' info. + void collectVariableInfo(DwarfCompileUnit &TheCU, const DISubprogram *SP, + DenseSet<InlinedVariable> &ProcessedVars); + + /// Build the location list for all DBG_VALUEs in the + /// function that describe the same variable. + void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc, + const DbgValueHistoryMap::InstrRanges &Ranges); + + /// Collect variable information from the side table maintained + /// by MMI. + void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P); + + /// Ensure that a label will be emitted before MI. + void requestLabelBeforeInsn(const MachineInstr *MI) { + LabelsBeforeInsn.insert(std::make_pair(MI, nullptr)); + } + + /// Ensure that a label will be emitted after MI. + void requestLabelAfterInsn(const MachineInstr *MI) { + LabelsAfterInsn.insert(std::make_pair(MI, nullptr)); + } + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfDebug(AsmPrinter *A, Module *M); + + ~DwarfDebug() override; + + /// Emit all Dwarf sections that should come prior to the + /// content. + void beginModule(); + + /// Emit all Dwarf sections that should come after the content. + void endModule() override; + + /// Gather pre-function debug information. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function debug information. + void endFunction(const MachineFunction *MF) override; + + /// Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; + + /// Process end of an instruction. + void endInstruction() override; + + /// Perform an MD5 checksum of \p Identifier and return the lower 64 bits. + static uint64_t makeTypeSignature(StringRef Identifier); + + /// Add a DIE to the set of types that we're going to pull into + /// type units. + void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier, + DIE &Die, const DICompositeType *CTy); + + /// Add a label so that arange data can be generated for it. + void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } + + /// For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override { + SymSize[Sym] = Size; + } + + /// Returns whether to emit DW_AT_[MIPS_]linkage_name. + bool useLinkageNames() const { return UseLinkageNames; } + + /// Returns whether to use DW_OP_GNU_push_tls_address, instead of the + /// standard DW_OP_form_tls_address opcode + bool useGNUTLSOpcode() const { return UseGNUTLSOpcode; } + + /// \defgroup DebuggerTuning Predicates to tune DWARF for a given debugger. + /// + /// Returns whether we are "tuning" for a given debugger. + /// @{ + bool tuneForGDB() const { return DebuggerTuning == DebuggerKind::GDB; } + bool tuneForLLDB() const { return DebuggerTuning == DebuggerKind::LLDB; } + bool tuneForSCE() const { return DebuggerTuning == DebuggerKind::SCE; } + /// @} + + // Experimental DWARF5 features. + + /// Returns whether or not to emit tables that dwarf consumers can + /// use to accelerate lookup. + bool useDwarfAccelTables() const { return HasDwarfAccelTables; } + + /// Returns whether or not to change the current debug info for the + /// split dwarf proposal support. + bool useSplitDwarf() const { return HasSplitDwarf; } + + /// Returns the Dwarf Version. + unsigned getDwarfVersion() const { return DwarfVersion; } + + /// Returns the previous CU that was being updated + const DwarfCompileUnit *getPrevCU() const { return PrevCU; } + void setPrevCU(const DwarfCompileUnit *PrevCU) { this->PrevCU = PrevCU; } + + /// Returns the entries for the .debug_loc section. + const DebugLocStream &getDebugLocs() const { return DebugLocs; } + + /// Emit an entry for the debug loc section. This can be used to + /// handle an entry that's going to be emitted into the debug loc section. + void emitDebugLocEntry(ByteStreamer &Streamer, + const DebugLocStream::Entry &Entry); + + /// Emit the location for a debug loc entry, including the size header. + void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry); + + /// Find the MDNode for the given reference. + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { + return Ref.resolve(TypeIdentifierMap); + } + + /// Return the TypeIdentifierMap. + const DITypeIdentifierMap &getTypeIdentifierMap() const { + return TypeIdentifierMap; + } + + /// Find the DwarfCompileUnit for the given CU Die. + DwarfCompileUnit *lookupUnit(const DIE *CU) const { + return CUDieMap.lookup(CU); + } + + void addSubprogramNames(const DISubprogram *SP, DIE &Die); + + AddressPool &getAddressPool() { return AddrPool; } + + void addAccelName(StringRef Name, const DIE &Die); + + void addAccelObjC(StringRef Name, const DIE &Die); + + void addAccelNamespace(StringRef Name, const DIE &Die); + + void addAccelType(StringRef Name, const DIE &Die, char Flags); + + const MachineFunction *getCurrentFunction() const { return CurFn; } + + /// A helper function to check whether the DIE for a given Scope is + /// going to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); + + /// Return Label preceding the instruction. + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// Return Label immediately following the instruction. + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + + // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit. + + SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() { + return ProcessedSPNodes; + } +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h new file mode 100644 index 0000000..f4667b4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -0,0 +1,87 @@ +//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H + +#include "EHStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" + +namespace llvm { +class MachineFunction; +class ARMTargetStreamer; + +class LLVM_LIBRARY_VISIBILITY DwarfCFIExceptionBase : public EHStreamer { +protected: + DwarfCFIExceptionBase(AsmPrinter *A); + + /// Per-function flag to indicate if frame CFI info should be emitted. + bool shouldEmitCFI; + + void markFunctionEnd() override; +}; + +class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase { + /// Per-function flag to indicate if .cfi_personality should be emitted. + bool shouldEmitPersonality; + + /// Per-function flag to indicate if .cfi_lsda should be emitted. + bool shouldEmitLSDA; + + /// Per-function flag to indicate if frame moves info should be emitted. + bool shouldEmitMoves; + + AsmPrinter::CFIMoveType moveTypeModule; + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + DwarfCFIException(AsmPrinter *A); + ~DwarfCFIException() override; + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; +}; + +class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase { + void emitTypeInfos(unsigned TTypeEncoding) override; + ARMTargetStreamer &getTargetStreamer(); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + ARMException(AsmPrinter *A); + ~ARMException() override; + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp new file mode 100644 index 0000000..7b5b831 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -0,0 +1,274 @@ +//===-- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf debug info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfExpression.h" +#include "DwarfDebug.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +void DwarfExpression::AddReg(int DwarfReg, const char *Comment) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + if (DwarfReg < 32) { + EmitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment); + } else { + EmitOp(dwarf::DW_OP_regx, Comment); + EmitUnsigned(DwarfReg); + } +} + +void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) { + assert(DwarfReg >= 0 && "invalid negative dwarf register number"); + if (DwarfReg < 32) { + EmitOp(dwarf::DW_OP_breg0 + DwarfReg); + } else { + EmitOp(dwarf::DW_OP_bregx); + EmitUnsigned(DwarfReg); + } + EmitSigned(Offset); + if (Deref) + EmitOp(dwarf::DW_OP_deref); +} + +void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) { + assert(SizeInBits > 0 && "piece has size zero"); + const unsigned SizeOfByte = 8; + if (OffsetInBits > 0 || SizeInBits % SizeOfByte) { + EmitOp(dwarf::DW_OP_bit_piece); + EmitUnsigned(SizeInBits); + EmitUnsigned(OffsetInBits); + } else { + EmitOp(dwarf::DW_OP_piece); + unsigned ByteSize = SizeInBits / SizeOfByte; + EmitUnsigned(ByteSize); + } +} + +void DwarfExpression::AddShr(unsigned ShiftBy) { + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(ShiftBy); + EmitOp(dwarf::DW_OP_shr); +} + +bool DwarfExpression::AddMachineRegIndirect(unsigned MachineReg, int Offset) { + if (isFrameRegister(MachineReg)) { + // If variable offset is based in frame register then use fbreg. + EmitOp(dwarf::DW_OP_fbreg); + EmitSigned(Offset); + return true; + } + + int DwarfReg = TRI.getDwarfRegNum(MachineReg, false); + if (DwarfReg < 0) + return false; + + AddRegIndirect(DwarfReg, Offset); + return true; +} + +bool DwarfExpression::AddMachineRegPiece(unsigned MachineReg, + unsigned PieceSizeInBits, + unsigned PieceOffsetInBits) { + if (!TRI.isPhysicalRegister(MachineReg)) + return false; + + int Reg = TRI.getDwarfRegNum(MachineReg, false); + + // If this is a valid register number, emit it. + if (Reg >= 0) { + AddReg(Reg); + if (PieceSizeInBits) + AddOpPiece(PieceSizeInBits, PieceOffsetInBits); + return true; + } + + // Walk up the super-register chain until we find a valid number. + // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0. + for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + Reg = TRI.getDwarfRegNum(*SR, false); + if (Reg >= 0) { + unsigned Idx = TRI.getSubRegIndex(*SR, MachineReg); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned RegOffset = TRI.getSubRegIdxOffset(Idx); + AddReg(Reg, "super-register"); + if (PieceOffsetInBits == RegOffset) { + AddOpPiece(Size, RegOffset); + } else { + // If this is part of a variable in a sub-register at a + // non-zero offset, we need to manually shift the value into + // place, since the DW_OP_piece describes the part of the + // variable, not the position of the subregister. + if (RegOffset) + AddShr(RegOffset); + AddOpPiece(Size, PieceOffsetInBits); + } + return true; + } + } + + // Otherwise, attempt to find a covering set of sub-register numbers. + // For example, Q0 on ARM is a composition of D0+D1. + // + // Keep track of the current position so we can emit the more + // efficient DW_OP_piece. + unsigned CurPos = PieceOffsetInBits; + // The size of the register in bits, assuming 8 bits per byte. + unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8; + // Keep track of the bits in the register we already emitted, so we + // can avoid emitting redundant aliasing subregs. + SmallBitVector Coverage(RegSize, false); + for (MCSubRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) { + unsigned Idx = TRI.getSubRegIndex(MachineReg, *SR); + unsigned Size = TRI.getSubRegIdxSize(Idx); + unsigned Offset = TRI.getSubRegIdxOffset(Idx); + Reg = TRI.getDwarfRegNum(*SR, false); + + // Intersection between the bits we already emitted and the bits + // covered by this subregister. + SmallBitVector Intersection(RegSize, false); + Intersection.set(Offset, Offset + Size); + Intersection ^= Coverage; + + // If this sub-register has a DWARF number and we haven't covered + // its range, emit a DWARF piece for it. + if (Reg >= 0 && Intersection.any()) { + AddReg(Reg, "sub-register"); + AddOpPiece(Size, Offset == CurPos ? 0 : Offset); + CurPos = Offset + Size; + + // Mark it as emitted. + Coverage.set(Offset, Offset + Size); + } + } + + return CurPos > PieceOffsetInBits; +} + +void DwarfExpression::AddSignedConstant(int Value) { + EmitOp(dwarf::DW_OP_consts); + EmitSigned(Value); + // The proper way to describe a constant value is + // DW_OP_constu <const>, DW_OP_stack_value. + // Unfortunately, DW_OP_stack_value was not available until DWARF-4, + // so we will continue to generate DW_OP_constu <const> for DWARF-2 + // and DWARF-3. Technically, this is incorrect since DW_OP_const <const> + // actually describes a value at a constant addess, not a constant value. + // However, in the past there was no better way to describe a constant + // value, so the producers and consumers started to rely on heuristics + // to disambiguate the value vs. location status of the expression. + // See PR21176 for more details. + if (DwarfVersion >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + +void DwarfExpression::AddUnsignedConstant(unsigned Value) { + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(Value); + // cf. comment in DwarfExpression::AddSignedConstant(). + if (DwarfVersion >= 4) + EmitOp(dwarf::DW_OP_stack_value); +} + +static unsigned getOffsetOrZero(unsigned OffsetInBits, + unsigned PieceOffsetInBits) { + if (OffsetInBits == PieceOffsetInBits) + return 0; + assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces"); + return OffsetInBits; +} + +bool DwarfExpression::AddMachineRegExpression(const DIExpression *Expr, + unsigned MachineReg, + unsigned PieceOffsetInBits) { + auto I = Expr->expr_op_begin(); + auto E = Expr->expr_op_end(); + if (I == E) + return AddMachineRegPiece(MachineReg); + + // Pattern-match combinations for which more efficient representations exist + // first. + bool ValidReg = false; + switch (I->getOp()) { + case dwarf::DW_OP_bit_piece: { + unsigned OffsetInBits = I->getArg(0); + unsigned SizeInBits = I->getArg(1); + // Piece always comes at the end of the expression. + return AddMachineRegPiece(MachineReg, SizeInBits, + getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + } + case dwarf::DW_OP_plus: + case dwarf::DW_OP_minus: { + // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset]. + // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset]. + auto N = I.getNext(); + if (N != E && N->getOp() == dwarf::DW_OP_deref) { + unsigned Offset = I->getArg(0); + ValidReg = AddMachineRegIndirect( + MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset); + std::advance(I, 2); + break; + } else + ValidReg = AddMachineRegPiece(MachineReg); + } + case dwarf::DW_OP_deref: { + // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg]. + ValidReg = AddMachineRegIndirect(MachineReg); + ++I; + break; + } + default: + llvm_unreachable("unsupported operand"); + } + + if (!ValidReg) + return false; + + // Emit remaining elements of the expression. + AddExpression(I, E, PieceOffsetInBits); + return true; +} + +void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I, + DIExpression::expr_op_iterator E, + unsigned PieceOffsetInBits) { + for (; I != E; ++I) { + switch (I->getOp()) { + case dwarf::DW_OP_bit_piece: { + unsigned OffsetInBits = I->getArg(0); + unsigned SizeInBits = I->getArg(1); + AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits)); + break; + } + case dwarf::DW_OP_plus: + EmitOp(dwarf::DW_OP_plus_uconst); + EmitUnsigned(I->getArg(0)); + break; + case dwarf::DW_OP_minus: + // There is no OP_minus_uconst. + EmitOp(dwarf::DW_OP_constu); + EmitUnsigned(I->getArg(0)); + EmitOp(dwarf::DW_OP_minus); + break; + case dwarf::DW_OP_deref: + EmitOp(dwarf::DW_OP_deref); + break; + default: + llvm_unreachable("unhandled opcode found in expression"); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h new file mode 100644 index 0000000..78ec937 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -0,0 +1,136 @@ +//===-- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H + +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class AsmPrinter; +class ByteStreamer; +class TargetRegisterInfo; +class DwarfUnit; +class DIELoc; + +/// Base class containing the logic for constructing DWARF expressions +/// independently of whether they are emitted into a DIE or into a .debug_loc +/// entry. +class DwarfExpression { +protected: + // Various convenience accessors that extract things out of AsmPrinter. + const TargetRegisterInfo &TRI; + unsigned DwarfVersion; + +public: + DwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion) + : TRI(TRI), DwarfVersion(DwarfVersion) {} + virtual ~DwarfExpression() {} + + /// Output a dwarf operand and an optional assembler comment. + virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0; + /// Emit a raw signed value. + virtual void EmitSigned(int64_t Value) = 0; + /// Emit a raw unsigned value. + virtual void EmitUnsigned(uint64_t Value) = 0; + /// Return whether the given machine register is the frame register in the + /// current function. + virtual bool isFrameRegister(unsigned MachineReg) = 0; + + /// Emit a dwarf register operation. + void AddReg(int DwarfReg, const char *Comment = nullptr); + /// Emit an (double-)indirect dwarf register operation. + void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false); + + /// Emit a dwarf register operation for describing + /// - a small value occupying only part of a register or + /// - a register representing only part of a value. + void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0); + /// Emit a shift-right dwarf expression. + void AddShr(unsigned ShiftBy); + + /// Emit an indirect dwarf register operation for the given machine register. + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegIndirect(unsigned MachineReg, int Offset = 0); + + /// \brief Emit a partial DWARF register operation. + /// \param MachineReg the register + /// \param PieceSizeInBits size and + /// \param PieceOffsetInBits offset of the piece in bits, if this is one + /// piece of an aggregate value. + /// + /// If size and offset is zero an operation for the entire + /// register is emitted: Some targets do not provide a DWARF + /// register number for every register. If this is the case, this + /// function will attempt to emit a DWARF register by emitting a + /// piece of a super-register or by piecing together multiple + /// subregisters that alias the register. + /// + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegPiece(unsigned MachineReg, unsigned PieceSizeInBits = 0, + unsigned PieceOffsetInBits = 0); + + /// Emit a signed constant. + void AddSignedConstant(int Value); + /// Emit an unsigned constant. + void AddUnsignedConstant(unsigned Value); + + /// \brief Emit an entire expression on top of a machine register location. + /// + /// \param PieceOffsetInBits If this is one piece out of a fragmented + /// location, this is the offset of the piece inside the entire variable. + /// \return false if no DWARF register exists for MachineReg. + bool AddMachineRegExpression(const DIExpression *Expr, unsigned MachineReg, + unsigned PieceOffsetInBits = 0); + /// Emit a the operations remaining the DIExpressionIterator I. + /// \param PieceOffsetInBits If this is one piece out of a fragmented + /// location, this is the offset of the piece inside the entire variable. + void AddExpression(DIExpression::expr_op_iterator I, + DIExpression::expr_op_iterator E, + unsigned PieceOffsetInBits = 0); +}; + +/// DwarfExpression implementation for .debug_loc entries. +class DebugLocDwarfExpression : public DwarfExpression { + ByteStreamer &BS; + +public: + DebugLocDwarfExpression(const TargetRegisterInfo &TRI, + unsigned DwarfVersion, ByteStreamer &BS) + : DwarfExpression(TRI, DwarfVersion), BS(BS) {} + + void EmitOp(uint8_t Op, const char *Comment = nullptr) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; + bool isFrameRegister(unsigned MachineReg) override; +}; + +/// DwarfExpression implementation for singular DW_AT_location. +class DIEDwarfExpression : public DwarfExpression { +const AsmPrinter &AP; + DwarfUnit &DU; + DIELoc &DIE; + +public: + DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE); + void EmitOp(uint8_t Op, const char *Comment = nullptr) override; + void EmitSigned(int64_t Value) override; + void EmitUnsigned(uint64_t Value) override; + bool isFrameRegister(unsigned MachineReg) override; +}; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp new file mode 100644 index 0000000..51b27b4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp @@ -0,0 +1,173 @@ +//===-- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DwarfFile.h" +#include "DwarfDebug.h" +#include "DwarfUnit.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { +DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA) + : Asm(AP), StrPool(DA, *Asm, Pref) {} + +DwarfFile::~DwarfFile() { + for (DIEAbbrev *Abbrev : Abbreviations) + Abbrev->~DIEAbbrev(); +} + +// Define a unique number for the abbreviation. +// +DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) { + FoldingSetNodeID ID; + DIEAbbrev Abbrev = Die.generateAbbrev(); + Abbrev.Profile(ID); + + void *InsertPos; + if (DIEAbbrev *Existing = + AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) { + Die.setAbbrevNumber(Existing->getNumber()); + return *Existing; + } + + // Move the abbreviation to the heap and assign a number. + DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev)); + Abbreviations.push_back(New); + New->setNumber(Abbreviations.size()); + Die.setAbbrevNumber(Abbreviations.size()); + + // Store it for lookup. + AbbreviationsSet.InsertNode(New, InsertPos); + return *New; +} + +void DwarfFile::addUnit(std::unique_ptr<DwarfUnit> U) { + CUs.push_back(std::move(U)); +} + +// Emit the various dwarf units to the unit section USection with +// the abbreviations going into ASection. +void DwarfFile::emitUnits(bool UseOffsets) { + for (const auto &TheU : CUs) { + DIE &Die = TheU->getUnitDie(); + MCSection *USection = TheU->getSection(); + Asm->OutStreamer->SwitchSection(USection); + + TheU->emitHeader(UseOffsets); + + Asm->emitDwarfDIE(Die); + } +} + +// Compute the size and offset for each DIE. +void DwarfFile::computeSizeAndOffsets() { + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. + for (const auto &TheU : CUs) { + TheU->setDebugInfoOffset(SecOffset); + + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + TheU->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. + unsigned EndOffset = computeSizeAndOffset(TheU->getUnitDie(), Offset); + SecOffset += EndOffset; + } +} +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. +unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) { + // Record the abbreviation. + const DIEAbbrev &Abbrev = assignAbbrevNumber(Die); + + // Set DIE offset + Die.setOffset(Offset); + + // Start the size with the size of abbreviation code. + Offset += getULEB128Size(Die.getAbbrevNumber()); + + // Size the DIE attribute values. + for (const auto &V : Die.values()) + // Size attribute value. + Offset += V.SizeOf(Asm); + + // Size the DIE children if any. + if (Die.hasChildren()) { + (void)Abbrev; + assert(Abbrev.hasChildren() && "Children flag not set"); + + for (auto &Child : Die.children()) + Offset = computeSizeAndOffset(Child, Offset); + + // End of children marker. + Offset += sizeof(int8_t); + } + + Die.setSize(Offset - Die.getOffset()); + return Offset; +} + +void DwarfFile::emitAbbrevs(MCSection *Section) { + // Check to see if it is worth the effort. + if (!Abbreviations.empty()) { + // Start the debug abbrev section. + Asm->OutStreamer->SwitchSection(Section); + Asm->emitDwarfAbbrevs(Abbreviations); + } +} + +// Emit strings into a string section. +void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) { + StrPool.emit(*Asm, StrSection, OffsetSection); +} + +bool DwarfFile::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; + const DILocalVariable *DV = Var->getVariable(); + // Variables with positive arg numbers are parameters. + if (unsigned ArgNum = DV->getArg()) { + // Keep all parameters in order at the start of the variable list to ensure + // function types are correct (no out-of-order parameters) + // + // This could be improved by only doing it for optimized builds (unoptimized + // builds have the right order to begin with), searching from the back (this + // would catch the unoptimized case quickly), or doing a binary search + // rather than linear search. + auto I = Vars.begin(); + while (I != Vars.end()) { + unsigned CurNum = (*I)->getVariable()->getArg(); + // A local (non-parameter) variable has been found, insert immediately + // before it. + if (CurNum == 0) + break; + // A later indexed parameter has been found, insert immediately before it. + if (CurNum > ArgNum) + break; + if (CurNum == ArgNum) { + (*I)->addMMIEntry(*Var); + return false; + } + ++I; + } + Vars.insert(I, Var); + return true; + } + + Vars.push_back(Var); + return true; +} +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h new file mode 100644 index 0000000..8402027 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h @@ -0,0 +1,118 @@ +//===-- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFFILE_H + +#include "AddressPool.h" +#include "DwarfStringPool.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/Support/Allocator.h" +#include <memory> +#include <string> +#include <vector> + +namespace llvm { +class AsmPrinter; +class DbgVariable; +class DwarfUnit; +class DIEAbbrev; +class MCSymbol; +class DIE; +class LexicalScope; +class StringRef; +class DwarfDebug; +class MCSection; +class MDNode; +class DwarfFile { + // Target of Dwarf emission, used for sizing of abbreviations. + AsmPrinter *Asm; + + BumpPtrAllocator AbbrevAllocator; + + // Used to uniquely define abbreviations. + FoldingSet<DIEAbbrev> AbbreviationsSet; + + // A list of all the unique abbreviations in use. + std::vector<DIEAbbrev *> Abbreviations; + + // A pointer to all units in the section. + SmallVector<std::unique_ptr<DwarfUnit>, 1> CUs; + + DwarfStringPool StrPool; + + // Collection of dbg variables of a scope. + DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> ScopeVariables; + + // Collection of abstract subprogram DIEs. + DenseMap<const MDNode *, DIE *> AbstractSPDies; + + /// Maps MDNodes for type system with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in DwarfCompileUnit. + DenseMap<const MDNode *, DIE *> DITypeNodeToDieMap; + +public: + DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA); + + ~DwarfFile(); + + const SmallVectorImpl<std::unique_ptr<DwarfUnit>> &getUnits() { return CUs; } + + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE &Die, unsigned Offset); + + /// \brief Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// Define a unique number for the abbreviation. + /// + /// Compute the abbreviation for \c Die, look up its unique number, and + /// return a reference to it in the uniquing table. + DIEAbbrev &assignAbbrevNumber(DIE &Die); + + /// \brief Add a unit to the list of CUs. + void addUnit(std::unique_ptr<DwarfUnit> U); + + /// \brief Emit all of the units to the section listed with the given + /// abbreviation section. + void emitUnits(bool UseOffsets); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(MCSection *); + + /// \brief Emit all of the strings to the section given. + void emitStrings(MCSection *StrSection, MCSection *OffsetSection = nullptr); + + /// \brief Returns the string pool. + DwarfStringPool &getStringPool() { return StrPool; } + + /// \returns false if the variable was merged with a previous one. + bool addScopeVariable(LexicalScope *LS, DbgVariable *Var); + + DenseMap<LexicalScope *, SmallVector<DbgVariable *, 8>> &getScopeVariables() { + return ScopeVariables; + } + + DenseMap<const MDNode *, DIE *> &getAbstractSPDies() { + return AbstractSPDies; + } + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + DITypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return DITypeNodeToDieMap.lookup(TypeMD); + } +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp new file mode 100644 index 0000000..2066f74 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp @@ -0,0 +1,74 @@ +//===-- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DwarfStringPool.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +DwarfStringPool::DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, + StringRef Prefix) + : Pool(A), Prefix(Prefix), + ShouldCreateSymbols(Asm.MAI->doesDwarfUseRelocationsAcrossSections()) {} + +DwarfStringPool::EntryRef DwarfStringPool::getEntry(AsmPrinter &Asm, + StringRef Str) { + auto I = Pool.insert(std::make_pair(Str, EntryTy())); + if (I.second) { + auto &Entry = I.first->second; + Entry.Index = Pool.size() - 1; + Entry.Offset = NumBytes; + Entry.Symbol = ShouldCreateSymbols ? Asm.createTempSymbol(Prefix) : nullptr; + + NumBytes += Str.size() + 1; + assert(NumBytes > Entry.Offset && "Unexpected overflow"); + } + return EntryRef(*I.first); +} + +void DwarfStringPool::emit(AsmPrinter &Asm, MCSection *StrSection, + MCSection *OffsetSection) { + if (Pool.empty()) + return; + + // Start the dwarf str section. + Asm.OutStreamer->SwitchSection(StrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector<const StringMapEntry<EntryTy> *, 64> Entries(Pool.size()); + + for (const auto &E : Pool) + Entries[E.getValue().Index] = &E; + + for (const auto &Entry : Entries) { + assert(ShouldCreateSymbols == static_cast<bool>(Entry->getValue().Symbol) && + "Mismatch between setting and entry"); + + // Emit a label for reference from debug information entries. + if (ShouldCreateSymbols) + Asm.OutStreamer->EmitLabel(Entry->getValue().Symbol); + + // Emit the string itself with a terminating null byte. + Asm.OutStreamer->AddComment("string offset=" + + Twine(Entry->getValue().Offset)); + Asm.OutStreamer->EmitBytes( + StringRef(Entry->getKeyData(), Entry->getKeyLength() + 1)); + } + + // If we've got an offset section go ahead and emit that now as well. + if (OffsetSection) { + Asm.OutStreamer->SwitchSection(OffsetSection); + unsigned size = 4; // FIXME: DWARF64 is 8. + for (const auto &Entry : Entries) + Asm.OutStreamer->EmitIntValue(Entry->getValue().Offset, size); + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h new file mode 100644 index 0000000..93a1684 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfStringPool.h @@ -0,0 +1,49 @@ +//===-- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework -*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFSTRINGPOOL_H + +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/DwarfStringPoolEntry.h" +#include "llvm/Support/Allocator.h" +#include <utility> + +namespace llvm { + +class AsmPrinter; +class MCSymbol; +class MCSection; +class StringRef; + +// Collection of strings for this unit and assorted symbols. +// A String->Symbol mapping of strings used by indirect +// references. +class DwarfStringPool { + typedef DwarfStringPoolEntry EntryTy; + StringMap<EntryTy, BumpPtrAllocator &> Pool; + StringRef Prefix; + unsigned NumBytes = 0; + bool ShouldCreateSymbols; + +public: + typedef DwarfStringPoolEntryRef EntryRef; + + DwarfStringPool(BumpPtrAllocator &A, AsmPrinter &Asm, StringRef Prefix); + + void emit(AsmPrinter &Asm, MCSection *StrSection, + MCSection *OffsetSection = nullptr); + + bool empty() const { return Pool.empty(); } + + /// Get a reference to an entry in the string pool. + EntryRef getEntry(AsmPrinter &Asm, StringRef Str); +}; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp new file mode 100644 index 0000000..d75fea5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -0,0 +1,1553 @@ +//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for constructing a dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#include "DwarfUnit.h" +#include "DwarfAccelTable.h" +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "DwarfExpression.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "dwarfdebug" + +static cl::opt<bool> +GenerateDwarfTypeUnits("generate-type-units", cl::Hidden, + cl::desc("Generate DWARF4 type units."), + cl::init(false)); + +DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, + DIELoc &DIE) + : DwarfExpression(*AP.MF->getSubtarget().getRegisterInfo(), + AP.getDwarfDebug()->getDwarfVersion()), + AP(AP), DU(DU), DIE(DIE) {} + +void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) { + DU.addUInt(DIE, dwarf::DW_FORM_data1, Op); +} +void DIEDwarfExpression::EmitSigned(int64_t Value) { + DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value); +} +void DIEDwarfExpression::EmitUnsigned(uint64_t Value) { + DU.addUInt(DIE, dwarf::DW_FORM_udata, Value); +} +bool DIEDwarfExpression::isFrameRegister(unsigned MachineReg) { + return MachineReg == TRI.getFrameRegister(*AP.MF); +} + +DwarfUnit::DwarfUnit(unsigned UID, dwarf::Tag UnitTag, + const DICompileUnit *Node, AsmPrinter *A, DwarfDebug *DW, + DwarfFile *DWU) + : UniqueID(UID), CUNode(Node), + UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), DebugInfoOffset(0), + Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) { + assert(UnitTag == dwarf::DW_TAG_compile_unit || + UnitTag == dwarf::DW_TAG_type_unit); +} + +DwarfTypeUnit::DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable) + : DwarfUnit(UID, dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), + CU(CU), SplitLineTable(SplitLineTable) { + if (SplitLineTable) + addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0); +} + +DwarfUnit::~DwarfUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); + for (unsigned j = 0, M = DIELocs.size(); j < M; ++j) + DIELocs[j]->~DIELoc(); +} + +int64_t DwarfUnit::getDefaultLowerBound() const { + switch (getLanguage()) { + default: + break; + + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_ObjC: + case dwarf::DW_LANG_ObjC_plus_plus: + return 0; + + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran95: + return 1; + + // The languages below have valid values only if the DWARF version >= 4. + case dwarf::DW_LANG_Java: + case dwarf::DW_LANG_Python: + case dwarf::DW_LANG_UPC: + case dwarf::DW_LANG_D: + if (dwarf::DWARF_VERSION >= 4) + return 0; + break; + + case dwarf::DW_LANG_Ada83: + case dwarf::DW_LANG_Ada95: + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LANG_Modula2: + case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LANG_PLI: + if (dwarf::DWARF_VERSION >= 4) + return 1; + break; + + // The languages below have valid values only if the DWARF version >= 5. + case dwarf::DW_LANG_OpenCL: + case dwarf::DW_LANG_Go: + case dwarf::DW_LANG_Haskell: + case dwarf::DW_LANG_C_plus_plus_03: + case dwarf::DW_LANG_C_plus_plus_11: + case dwarf::DW_LANG_OCaml: + case dwarf::DW_LANG_Rust: + case dwarf::DW_LANG_C11: + case dwarf::DW_LANG_Swift: + case dwarf::DW_LANG_Dylan: + case dwarf::DW_LANG_C_plus_plus_14: + if (dwarf::DWARF_VERSION >= 5) + return 0; + break; + + case dwarf::DW_LANG_Modula3: + case dwarf::DW_LANG_Julia: + case dwarf::DW_LANG_Fortran03: + case dwarf::DW_LANG_Fortran08: + if (dwarf::DWARF_VERSION >= 5) + return 1; + break; + } + + return -1; +} + +/// Check whether the DIE for this MDNode can be shared across CUs. +static bool isShareableAcrossCUs(const DINode *D) { + // When the MDNode can be part of the type system, the DIE can be shared + // across CUs. + // Combining type units and cross-CU DIE sharing is lower value (since + // cross-CU DIE sharing is used in LTO and removes type redundancy at that + // level already) but may be implementable for some value in projects + // building multiple independent libraries with LTO and then linking those + // together. + return (isa<DIType>(D) || + (isa<DISubprogram>(D) && !cast<DISubprogram>(D)->isDefinition())) && + !GenerateDwarfTypeUnits; +} + +DIE *DwarfUnit::getDIE(const DINode *D) const { + if (isShareableAcrossCUs(D)) + return DU->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DU->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + +void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present, + DIEInteger(1)); + else + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag, + DIEInteger(1)); +} + +void DwarfUnit::addUInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); +} + +void DwarfUnit::addUInt(DIEValueList &Block, dwarf::Form Form, + uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); +} + +void DwarfUnit::addSInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); + Die.addValue(DIEValueAllocator, Attribute, *Form, DIEInteger(Integer)); +} + +void DwarfUnit::addSInt(DIELoc &Die, Optional<dwarf::Form> Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); +} + +void DwarfUnit::addString(DIE &Die, dwarf::Attribute Attribute, + StringRef String) { + Die.addValue(DIEValueAllocator, Attribute, + isDwoUnit() ? dwarf::DW_FORM_GNU_str_index : dwarf::DW_FORM_strp, + DIEString(DU->getStringPool().getEntry(*Asm, String))); +} + +DIEValueList::value_iterator DwarfUnit::addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label) { + return Die.addValue(DIEValueAllocator, Attribute, Form, DIELabel(Label)); +} + +void DwarfUnit::addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + +void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + +unsigned DwarfTypeUnit::getOrCreateSourceID(StringRef FileName, StringRef DirName) { + return SplitLineTable ? SplitLineTable->getFile(DirName, FileName) + : getCU().getOrCreateSourceID(FileName, DirName); +} + +void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) { + if (!DD->useSplitDwarf()) { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); + } else { + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, + DD->getAddressPool().getIndex(Sym)); + } +} + +void DwarfUnit::addLabelDelta(DIE &Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_data4, + new (DIEValueAllocator) DIEDelta(Hi, Lo)); +} + +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry) { + addDIEEntry(Die, Attribute, DIEEntry(Entry)); +} + +void DwarfUnit::addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type) { + // Flag the type unit reference as a declaration so that if it contains + // members (implicit special members, static data member definitions, member + // declarations for definitions in this CU, etc) consumers don't get confused + // and think this is a full definition. + addFlag(Die, dwarf::DW_AT_declaration); + + Die.addValue(DIEValueAllocator, dwarf::DW_AT_signature, + dwarf::DW_FORM_ref_sig8, DIETypeSignature(Type)); +} + +void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, + StringRef Identifier) { + uint64_t Signature = DD->makeTypeSignature(Identifier); + Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_ref_sig8, + DIEInteger(Signature)); +} + +void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute, + DIEEntry Entry) { + const DIE *DieCU = Die.getUnitOrNull(); + const DIE *EntryCU = Entry.getEntry().getUnitOrNull(); + if (!DieCU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + DieCU = &getUnitDie(); + if (!EntryCU) + EntryCU = &getUnitDie(); + Die.addValue(DIEValueAllocator, Attribute, + EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + Entry); +} + +DIE &DwarfUnit::createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N) { + DIE &Die = Parent.addChild(DIE::get(DIEValueAllocator, (dwarf::Tag)Tag)); + if (N) + insertDIE(N, &Die); + return Die; +} + +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Loc) { + Loc->ComputeSize(Asm); + DIELocs.push_back(Loc); // Memoize so we can call the destructor later on. + Die.addValue(DIEValueAllocator, Attribute, + Loc->BestForm(DD->getDwarfVersion()), Loc); +} + +void DwarfUnit::addBlock(DIE &Die, dwarf::Attribute Attribute, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die.addValue(DIEValueAllocator, Attribute, Block->BestForm(), Block); +} + +void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, StringRef File, + StringRef Directory) { + if (Line == 0) + return; + + unsigned FileID = getOrCreateSourceID(File, Directory); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DILocalVariable *V) { + assert(V); + + addSourceLine(Die, V->getLine(), V->getScope()->getFilename(), + V->getScope()->getDirectory()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DIGlobalVariable *G) { + assert(G); + + addSourceLine(Die, G->getLine(), G->getFilename(), G->getDirectory()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DISubprogram *SP) { + assert(SP); + + addSourceLine(Die, SP->getLine(), SP->getFilename(), SP->getDirectory()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DIType *Ty) { + assert(Ty); + + addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DIObjCProperty *Ty) { + assert(Ty); + + addSourceLine(Die, Ty->getLine(), Ty->getFilename(), Ty->getDirectory()); +} + +void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) { + addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory()); +} + +bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + unsigned SizeInBits, unsigned OffsetInBits) { + DIEDwarfExpression Expr(*Asm, *this, TheDie); + Expr.AddMachineRegPiece(Reg, SizeInBits, OffsetInBits); + return true; +} + +bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg, + int64_t Offset) { + DIEDwarfExpression Expr(*Asm, *this, TheDie); + return Expr.AddMachineRegIndirect(Reg, Offset); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... <various fields> + struct __Block_byref_x_VarName *forwarding; + ... <various other fields> + SomeType VarName; + ... <maybe more fields> + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst <forward_fld_offset> + DW_OP_deref + DW_OP_plus_uconst <varName_fld_offset> + + That is what this function does. */ + +void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location) { + const DIType *Ty = DV.getType(); + const DIType *TmpTy = Ty; + uint16_t Tag = Ty->getTag(); + bool isPointer = false; + + StringRef varName = DV.getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + auto *DTy = cast<DIDerivedType>(Ty); + TmpTy = resolve(DTy->getBaseType()); + isPointer = true; + } + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DINodeArray Fields = cast<DICompositeType>(TmpTy)->getElements(); + const DIDerivedType *varField = nullptr; + const DIDerivedType *forwardingField = nullptr; + + for (unsigned i = 0, N = Fields.size(); i < N; ++i) { + auto *DT = cast<DIDerivedType>(Fields[i]); + StringRef fieldName = DT->getName(); + if (fieldName == "__forwarding") + forwardingField = DT; + else if (fieldName == varName) + varField = DT; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = forwardingField->getOffsetInBits() >> 3; + unsigned varFieldOffset = varField->getOffsetInBits() >> 2; + + // Decode the original location, and use that as the start of the byref + // variable's location. + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + + bool validReg; + if (Location.isReg()) + validReg = addRegisterOpPiece(*Loc, Location.getReg()); + else + validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset()); + + if (!validReg) + return; + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, Loc); +} + +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) { + if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + // FIXME: Enums without a fixed underlying type have unknown signedness + // here, leading to incorrectly emitted constants. + if (CTy->getTag() == dwarf::DW_TAG_enumeration_type) + return false; + + // (Pieces of) aggregate types that get hacked apart by SROA may be + // represented by a constant. Encode them as unsigned bytes. + return true; + } + + if (auto *DTy = dyn_cast<DIDerivedType>(Ty)) { + dwarf::Tag T = (dwarf::Tag)Ty->getTag(); + // Encode pointer constants as unsigned bytes. This is used at least for + // null pointer constant emission. + // FIXME: reference and rvalue_reference /probably/ shouldn't be allowed + // here, but accept them for now due to a bug in SROA producing bogus + // dbg.values. + if (T == dwarf::DW_TAG_pointer_type || + T == dwarf::DW_TAG_ptr_to_member_type || + T == dwarf::DW_TAG_reference_type || + T == dwarf::DW_TAG_rvalue_reference_type) + return true; + assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type || + T == dwarf::DW_TAG_volatile_type || + T == dwarf::DW_TAG_restrict_type); + DITypeRef Deriv = DTy->getBaseType(); + assert(Deriv && "Expected valid base type"); + return isUnsignedDIType(DD, DD->resolve(Deriv)); + } + + auto *BTy = cast<DIBasicType>(Ty); + unsigned Encoding = BTy->getEncoding(); + assert((Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_signed || + Encoding == dwarf::DW_ATE_signed_char || + Encoding == dwarf::DW_ATE_float || Encoding == dwarf::DW_ATE_UTF || + Encoding == dwarf::DW_ATE_boolean || + (Ty->getTag() == dwarf::DW_TAG_unspecified_type && + Ty->getName() == "decltype(nullptr)")) && + "Unsupported encoding"); + return Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_UTF || Encoding == dwarf::DW_ATE_boolean || + Ty->getTag() == dwarf::DW_TAG_unspecified_type; +} + +/// If this type is derived from a base type then return base type size. +static uint64_t getBaseTypeSize(DwarfDebug *DD, const DIDerivedType *Ty) { + unsigned Tag = Ty->getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return Ty->getSizeInBits(); + + auto *BaseType = DD->resolve(Ty->getBaseType()); + + assert(BaseType && "Unexpected invalid base type"); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType->getTag() == dwarf::DW_TAG_reference_type || + BaseType->getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty->getSizeInBits(); + + if (auto *DT = dyn_cast<DIDerivedType>(BaseType)) + return getBaseTypeSize(DD, DT); + + return BaseType->getSizeInBits(); +} + +void DwarfUnit::addConstantFPValue(DIE &Die, const MachineOperand &MO) { + assert(MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock; + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char *)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(*Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, Block); +} + +void DwarfUnit::addConstantFPValue(DIE &Die, const ConstantFP *CFP) { + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); +} + +void DwarfUnit::addConstantValue(DIE &Die, const ConstantInt *CI, + const DIType *Ty) { + addConstantValue(Die, CI->getValue(), Ty); +} + +void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO, + const DIType *Ty) { + assert(MO.isImm() && "Invalid machine operand!"); + + addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm()); +} + +void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) { + // FIXME: This is a bit conservative/simple - it emits negative values always + // sign extended to 64 bits rather than minimizing the number of bytes. + addUInt(Die, dwarf::DW_AT_const_value, + Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata, Val); +} + +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty) { + addConstantValue(Die, Val, isUnsignedDIType(DD, Ty)); +} + +void DwarfUnit::addConstantValue(DIE &Die, const APInt &Val, bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); + if (CIBitWidth <= 64) { + addConstantValue(Die, Unsigned, + Unsigned ? Val.getZExtValue() : Val.getSExtValue()); + return; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock; + + // Get the raw data form of the large APInt. + const uint64_t *Ptr64 = Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getDataLayout().isLittleEndian(); + + // Output the constant to DWARF one byte at a time. + for (int i = 0; i < NumBytes; i++) { + uint8_t c; + if (LittleEndian) + c = Ptr64[i / 8] >> (8 * (i & 7)); + else + c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); + addUInt(*Block, dwarf::DW_FORM_data1, c); + } + + addBlock(Die, dwarf::DW_AT_const_value, Block); +} + +void DwarfUnit::addLinkageName(DIE &Die, StringRef LinkageName) { + if (!LinkageName.empty() && DD->useLinkageNames()) + addString(Die, + DD->getDwarfVersion() >= 4 ? dwarf::DW_AT_linkage_name + : dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); +} + +void DwarfUnit::addTemplateParams(DIE &Buffer, DINodeArray TParams) { + // Add template parameters. + for (const auto *Element : TParams) { + if (auto *TTP = dyn_cast<DITemplateTypeParameter>(Element)) + constructTemplateTypeParameterDIE(Buffer, TTP); + else if (auto *TVP = dyn_cast<DITemplateValueParameter>(Element)) + constructTemplateValueParameterDIE(Buffer, TVP); + } +} + +DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) { + if (!Context || isa<DIFile>(Context)) + return &getUnitDie(); + if (auto *T = dyn_cast<DIType>(Context)) + return getOrCreateTypeDIE(T); + if (auto *NS = dyn_cast<DINamespace>(Context)) + return getOrCreateNameSpace(NS); + if (auto *SP = dyn_cast<DISubprogram>(Context)) + return getOrCreateSubprogramDIE(SP); + if (auto *M = dyn_cast<DIModule>(Context)) + return getOrCreateModule(M); + return getDIE(Context); +} + +DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) { + auto *Context = resolve(Ty->getScope()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + + if (DIE *TyDIE = getDIE(Ty)) + return TyDIE; + + // Create new type. + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); + + constructTypeDIE(TyDIE, cast<DICompositeType>(Ty)); + + if (!Ty->isExternalTypeRef()) + updateAcceleratorTables(Context, Ty, TyDIE); + return &TyDIE; +} + +DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) { + if (!TyNode) + return nullptr; + + auto *Ty = cast<DIType>(TyNode); + assert(Ty == resolve(Ty->getRef()) && + "type was not uniqued, possible ODR violation."); + + // DW_TAG_restrict_type is not supported in DWARF2 + if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2) + return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType())); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + auto *Context = resolve(Ty->getScope()); + DIE *ContextDIE = getOrCreateContextDIE(Context); + assert(ContextDIE); + + if (DIE *TyDIE = getDIE(Ty)) + return TyDIE; + + // Create new type. + DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty); + + updateAcceleratorTables(Context, Ty, TyDIE); + + if (auto *BT = dyn_cast<DIBasicType>(Ty)) + constructTypeDIE(TyDIE, BT); + else if (auto *STy = dyn_cast<DISubroutineType>(Ty)) + constructTypeDIE(TyDIE, STy); + else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) { + if (GenerateDwarfTypeUnits && !Ty->isForwardDecl()) + if (MDString *TypeId = CTy->getRawIdentifier()) { + DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy); + // Skip updating the accelerator tables since this is not the full type. + return &TyDIE; + } + constructTypeDIE(TyDIE, CTy); + } else { + constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty)); + } + + return &TyDIE; +} + +void DwarfUnit::updateAcceleratorTables(const DIScope *Context, + const DIType *Ty, const DIE &TyDIE) { + if (!Ty->getName().empty() && !Ty->isForwardDecl()) { + bool IsImplementation = 0; + if (auto *CT = dyn_cast<DICompositeType>(Ty)) { + // A runtime language of 0 actually means C/C++ and that any + // non-negative value is some version of Objective-C/C++. + IsImplementation = CT->getRuntimeLang() == 0 || CT->isObjcClassComplete(); + } + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; + DD->addAccelType(Ty->getName(), TyDIE, Flags); + + if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) || + isa<DINamespace>(Context)) + addGlobalType(Ty, TyDIE, Context); + } +} + +void DwarfUnit::addType(DIE &Entity, const DIType *Ty, + dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); + addDIEEntry(Entity, Attribute, DIEEntry(*getOrCreateTypeDIE(Ty))); +} + +std::string DwarfUnit::getParentContextString(const DIScope *Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + return ""; + + std::string CS; + SmallVector<const DIScope *, 1> Parents; + while (!isa<DICompileUnit>(Context)) { + Parents.push_back(Context); + if (Context->getScope()) + Context = resolve(Context->getScope()); + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (const DIScope *Ctx : make_range(Parents.rbegin(), Parents.rend())) { + StringRef Name = Ctx->getName(); + if (Name.empty() && isa<DINamespace>(Ctx)) + Name = "(anonymous namespace)"; + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIBasicType *BTy) { + // Get core information. + StringRef Name = BTy->getName(); + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + // An unspecified type only has a name attribute. + if (BTy->getTag() == dwarf::DW_TAG_unspecified_type) + return; + + addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy->getEncoding()); + + uint64_t Size = BTy->getSizeInBits() >> 3; + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) { + // Get core information. + StringRef Name = DTy->getName(); + uint64_t Size = DTy->getSizeInBits() >> 3; + uint16_t Tag = Buffer.getTag(); + + // Map to main type, void will not have a type. + const DIType *FromTy = resolve(DTy->getBaseType()); + if (FromTy) + addType(Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size && Tag != dwarf::DW_TAG_pointer_type + && Tag != dwarf::DW_TAG_ptr_to_member_type + && Tag != dwarf::DW_TAG_reference_type + && Tag != dwarf::DW_TAG_rvalue_reference_type) + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + + if (Tag == dwarf::DW_TAG_ptr_to_member_type) + addDIEEntry( + Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType()))); + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy->isForwardDecl()) + addSourceLine(Buffer, DTy); +} + +void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) { + for (unsigned i = 1, N = Args.size(); i < N; ++i) { + const DIType *Ty = resolve(Args[i]); + if (!Ty) { + assert(i == N-1 && "Unspecified parameter must be the last argument"); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); + } else { + DIE &Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); + addType(Arg, Ty); + if (Ty->isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } + } +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) { + // Add return type. A void return won't have a type. + auto Elements = cast<DISubroutineType>(CTy)->getTypeArray(); + if (Elements.size()) + if (auto RTy = resolve(Elements[0])) + addType(Buffer, RTy); + + bool isPrototyped = true; + if (Elements.size() == 2 && !Elements[1]) + isPrototyped = false; + + constructSubprogramArguments(Buffer, Elements); + + // Add prototype flag if we're dealing with a C language and the function has + // been prototyped. + uint16_t Language = getLanguage(); + if (isPrototyped && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(Buffer, dwarf::DW_AT_prototyped); + + if (CTy->isLValueReference()) + addFlag(Buffer, dwarf::DW_AT_reference); + + if (CTy->isRValueReference()) + addFlag(Buffer, dwarf::DW_AT_rvalue_reference); +} + +void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + if (CTy->isExternalTypeRef()) { + StringRef Identifier = CTy->getIdentifier(); + assert(!Identifier.empty() && "external type ref without identifier"); + addFlag(Buffer, dwarf::DW_AT_declaration); + return addDIETypeSignature(Buffer, dwarf::DW_AT_signature, Identifier); + } + + // Add name if not anonymous or intermediate type. + StringRef Name = CTy->getName(); + + uint64_t Size = CTy->getSizeInBits() >> 3; + uint16_t Tag = Buffer.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, CTy); + break; + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DINodeArray Elements = CTy->getElements(); + for (const auto *Element : Elements) { + if (!Element) + continue; + if (auto *SP = dyn_cast<DISubprogram>(Element)) + getOrCreateSubprogramDIE(SP); + else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) { + if (DDTy->getTag() == dwarf::DW_TAG_friend) { + DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend); + } else if (DDTy->isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } + } else if (auto *Property = dyn_cast<DIObjCProperty>(Element)) { + DIE &ElemDie = createAndAddDIE(Property->getTag(), Buffer); + StringRef PropertyName = Property->getName(); + addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); + if (Property->getType()) + addType(ElemDie, resolve(Property->getType())); + addSourceLine(ElemDie, Property); + StringRef GetterName = Property->getGetterName(); + if (!GetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_getter, GetterName); + StringRef SetterName = Property->getSetterName(); + if (!SetterName.empty()) + addString(ElemDie, dwarf::DW_AT_APPLE_property_setter, SetterName); + if (unsigned PropertyAttributes = Property->getAttributes()) + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); + } + } + + if (CTy->isAppleBlockExtension()) + addFlag(Buffer, dwarf::DW_AT_APPLE_block); + + // This is outside the DWARF spec, but GDB expects a DW_AT_containing_type + // inside C++ composite types to point to the base class with the vtable. + if (auto *ContainingType = + dyn_cast_or_null<DICompositeType>(resolve(CTy->getVTableHolder()))) + addDIEEntry(Buffer, dwarf::DW_AT_containing_type, + *getOrCreateTypeDIE(ContainingType)); + + if (CTy->isObjcClassComplete()) + addFlag(Buffer, dwarf::DW_AT_APPLE_objc_complete_type); + + // Add template parameters to a class, structure or union types. + // FIXME: The support isn't in the metadata for this yet. + if (Tag == dwarf::DW_TAG_class_type || + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + addTemplateParams(Buffer, CTy->getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(Buffer, dwarf::DW_AT_name, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_union_type) { + // Add size if non-zero (derived types might be zero-sized.) + // TODO: Do we care about size for enum forward declarations? + if (Size) + addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size); + else if (!CTy->isForwardDecl()) + // Add zero size if it is not a forward declaration. + addUInt(Buffer, dwarf::DW_AT_byte_size, None, 0); + + // If we're a forward decl, say so. + if (CTy->isForwardDecl()) + addFlag(Buffer, dwarf::DW_AT_declaration); + + // Add source line info if available. + if (!CTy->isForwardDecl()) + addSourceLine(Buffer, CTy); + + // No harm in adding the runtime language to the declaration. + unsigned RLang = CTy->getRuntimeLang(); + if (RLang) + addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); + } +} + +void DwarfUnit::constructTemplateTypeParameterDIE( + DIE &Buffer, const DITemplateTypeParameter *TP) { + DIE &ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP->getType()) + addType(ParamDIE, resolve(TP->getType())); + if (!TP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP->getName()); +} + +void DwarfUnit::constructTemplateValueParameterDIE( + DIE &Buffer, const DITemplateValueParameter *VP) { + DIE &ParamDIE = createAndAddDIE(VP->getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP->getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP->getType())); + if (!VP->getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP->getName()); + if (Metadata *Val = VP->getValue()) { + if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val)) + addConstantValue(ParamDIE, CI, resolve(VP->getType())); + else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) { + // For declaration non-type template parameters (such as global values and + // functions) + DIELoc *Loc = new (DIEValueAllocator) DIELoc; + addOpAddress(*Loc, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of the + // parameter, rather than a pointer to it. + addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Loc); + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa<MDString>(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast<MDString>(Val)->getString()); + } else if (VP->getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + addTemplateParams(ParamDIE, cast<MDTuple>(Val)); + } + } +} + +DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS->getScope()); + + if (DIE *NDie = getDIE(NS)) + return NDie; + DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + + StringRef Name = NS->getName(); + if (!Name.empty()) + addString(NDie, dwarf::DW_AT_name, NS->getName()); + else + Name = "(anonymous namespace)"; + DD->addAccelNamespace(Name, NDie); + addGlobalName(Name, NDie, NS->getScope()); + addSourceLine(NDie, NS); + return &NDie; +} + +DIE *DwarfUnit::getOrCreateModule(const DIModule *M) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(M->getScope()); + + if (DIE *MDie = getDIE(M)) + return MDie; + DIE &MDie = createAndAddDIE(dwarf::DW_TAG_module, *ContextDIE, M); + + if (!M->getName().empty()) { + addString(MDie, dwarf::DW_AT_name, M->getName()); + addGlobalName(M->getName(), MDie, M->getScope()); + } + if (!M->getConfigurationMacros().empty()) + addString(MDie, dwarf::DW_AT_LLVM_config_macros, + M->getConfigurationMacros()); + if (!M->getIncludePath().empty()) + addString(MDie, dwarf::DW_AT_LLVM_include_path, M->getIncludePath()); + if (!M->getISysRoot().empty()) + addString(MDie, dwarf::DW_AT_LLVM_isysroot, M->getISysRoot()); + + return &MDie; +} + +DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = + Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope())); + + if (DIE *SPDie = getDIE(SP)) + return SPDie; + + if (auto *SPDecl = SP->getDeclaration()) { + if (!Minimal) { + // Add subprogram definitions to the CU die directly. + ContextDIE = &getUnitDie(); + // Build the decl now to ensure it precedes the definition. + getOrCreateSubprogramDIE(SPDecl); + } + } + + // DW_TAG_inlined_subroutine may refer to this DIE. + DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); + + // Stop here and fill this in later, depending on whether or not this + // subprogram turns out to have inlined instances or not. + if (SP->isDefinition()) + return &SPDie; + + applySubprogramAttributes(SP, SPDie); + return &SPDie; +} + +bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP, + DIE &SPDie) { + DIE *DeclDie = nullptr; + StringRef DeclLinkageName; + if (auto *SPDecl = SP->getDeclaration()) { + DeclDie = getDIE(SPDecl); + assert(DeclDie && "This DIE should've already been constructed when the " + "definition DIE was created in " + "getOrCreateSubprogramDIE"); + DeclLinkageName = SPDecl->getLinkageName(); + unsigned DeclID = + getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory()); + unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory()); + if (DeclID != DefID) + addUInt(SPDie, dwarf::DW_AT_decl_file, None, DefID); + + if (SP->getLine() != SPDecl->getLine()) + addUInt(SPDie, dwarf::DW_AT_decl_line, None, SP->getLine()); + } + + // Add function template parameters. + addTemplateParams(SPDie, SP->getTemplateParams()); + + // Add the linkage name if we have one and it isn't in the Decl. + StringRef LinkageName = SP->getLinkageName(); + assert(((LinkageName.empty() || DeclLinkageName.empty()) || + LinkageName == DeclLinkageName) && + "decl has a linkage name and it is different"); + if (DeclLinkageName.empty()) + addLinkageName(SPDie, LinkageName); + + if (!DeclDie) + return false; + + // Refer to the function declaration where all the other attributes will be + // found. + addDIEEntry(SPDie, dwarf::DW_AT_specification, *DeclDie); + return true; +} + +void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, + bool Minimal) { + if (!Minimal) + if (applySubprogramDefinitionAttributes(SP, SPDie)) + return; + + // Constructors and operators for anonymous aggregates do not have names. + if (!SP->getName().empty()) + addString(SPDie, dwarf::DW_AT_name, SP->getName()); + + // Skip the rest of the attributes under -gmlt to save space. + if (Minimal) + return; + + addSourceLine(SPDie, SP); + + // Add the prototype if we have a prototype and we have a C like + // language. + uint16_t Language = getLanguage(); + if (SP->isPrototyped() && + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || + Language == dwarf::DW_LANG_ObjC)) + addFlag(SPDie, dwarf::DW_AT_prototyped); + + DITypeRefArray Args; + if (const DISubroutineType *SPTy = SP->getType()) + Args = SPTy->getTypeArray(); + + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.size()) + if (auto Ty = resolve(Args[0])) + addType(SPDie, Ty); + + unsigned VK = SP->getVirtuality(); + if (VK) { + addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); + DIELoc *Block = getDIELoc(); + addUInt(*Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + ContainingTypeMap.insert( + std::make_pair(&SPDie, resolve(SP->getContainingType()))); + } + + if (!SP->isDefinition()) { + addFlag(SPDie, dwarf::DW_AT_declaration); + + // Add arguments. Do not add arguments for subprogram definition. They will + // be handled while processing variables. + constructSubprogramArguments(SPDie, Args); + } + + if (SP->isArtificial()) + addFlag(SPDie, dwarf::DW_AT_artificial); + + if (!SP->isLocalToUnit()) + addFlag(SPDie, dwarf::DW_AT_external); + + if (SP->isOptimized()) + addFlag(SPDie, dwarf::DW_AT_APPLE_optimized); + + if (unsigned isa = Asm->getISAEncoding()) + addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + + if (SP->isLValueReference()) + addFlag(SPDie, dwarf::DW_AT_reference); + + if (SP->isRValueReference()) + addFlag(SPDie, dwarf::DW_AT_rvalue_reference); + + if (SP->isProtected()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (SP->isPrivate()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else if (SP->isPublic()) + addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (SP->isExplicit()) + addFlag(SPDie, dwarf::DW_AT_explicit); +} + +void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, + DIE *IndexTy) { + DIE &DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, *IndexTy); + + // The LowerBound value defines the lower bounds which is typically zero for + // C/C++. The Count value is the number of elements. Values are 64 bit. If + // Count == -1 then the array is unbounded and we do not emit + // DW_AT_lower_bound and DW_AT_count attributes. + int64_t LowerBound = SR->getLowerBound(); + int64_t DefaultLowerBound = getDefaultLowerBound(); + int64_t Count = SR->getCount(); + + if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); + + if (Count != -1) + // FIXME: An unbounded array should reference the expression that defines + // the array. + addUInt(DW_Subrange, dwarf::DW_AT_count, None, Count); +} + +DIE *DwarfUnit::getIndexTyDie() { + if (IndexTyDie) + return IndexTyDie; + // Construct an integer type to use for indexes. + IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie); + addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype"); + addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t)); + addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_unsigned); + return IndexTyDie; +} + +void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + if (CTy->isVector()) + addFlag(Buffer, dwarf::DW_AT_GNU_vector); + + // Emit the element type. + addType(Buffer, resolve(CTy->getBaseType())); + + // Get an anonymous type for index type. + // FIXME: This type should be passed down from the front end + // as different languages may have different sizes for indexes. + DIE *IdxTy = getIndexTyDie(); + + // Add subranges to array type. + DINodeArray Elements = CTy->getElements(); + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + // FIXME: Should this really be such a loose cast? + if (auto *Element = dyn_cast_or_null<DINode>(Elements[i])) + if (Element->getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, cast<DISubrange>(Element), IdxTy); + } +} + +void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) { + DINodeArray Elements = CTy->getElements(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.size(); i < N; ++i) { + auto *Enum = dyn_cast_or_null<DIEnumerator>(Elements[i]); + if (Enum) { + DIE &Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum->getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = Enum->getValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Value); + } + } + const DIType *DTy = resolve(CTy->getBaseType()); + if (DTy) { + addType(Buffer, DTy); + addFlag(Buffer, dwarf::DW_AT_enum_class); + } +} + +void DwarfUnit::constructContainingTypeDIEs() { + for (auto CI = ContainingTypeMap.begin(), CE = ContainingTypeMap.end(); + CI != CE; ++CI) { + DIE &SPDie = *CI->first; + const DINode *D = CI->second; + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, *NDie); + } +} + +void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) { + DIE &MemberDie = createAndAddDIE(DT->getTag(), Buffer); + StringRef Name = DT->getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, Name); + + addType(MemberDie, resolve(DT->getBaseType())); + + addSourceLine(MemberDie, DT); + + if (DT->getTag() == dwarf::DW_TAG_inheritance && DT->isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIELoc *VBaseLocationDie = new (DIEValueAllocator) DIELoc; + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_udata, DT->getOffsetInBits()); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(*VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT->getSizeInBits(); + uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t OffsetInBytes; + + if (FieldSize && Size != FieldSize) { + // Handle bitfield, assume bytes are 8 bits. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, FieldSize/8); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size); + // + // The DWARF 2 DW_AT_bit_offset is counting the bits between the most + // significant bit of the aligned storage unit containing the bit field to + // the most significan bit of the bit field. + // + // FIXME: DWARF 4 states that DW_AT_data_bit_offset (which + // counts from the beginning, regardless of endianness) should + // be used instead. + // + // + // Struct Align Align Align + // v v v v + // +-----------+-----*-----+-----*-----+-- + // | ... |b1|b2|b3|b4| + // +-----------+-----*-----+-----*-----+-- + // | | |<-- Size ->| | + // |<---- Offset --->| |<--->| + // | | | \_ DW_AT_bit_offset (little endian) + // | |<--->| + // |<--------->| \_ StartBitOffset = DW_AT_bit_offset (big endian) + // \ = DW_AT_data_bit_offset (biendian) + // \_ OffsetInBytes + uint64_t Offset = DT->getOffsetInBits(); + uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize; + uint64_t AlignMask = ~(Align - 1); + // The bits from the start of the storage unit to the start of the field. + uint64_t StartBitOffset = Offset - (Offset & AlignMask); + // The endian-dependent DWARF 2 offset. + uint64_t DwarfBitOffset = Asm->getDataLayout().isLittleEndian() + ? OffsetToAlignment(Offset + Size, Align) + : StartBitOffset; + + // The byte offset of the field's aligned storage unit inside the struct. + OffsetInBytes = (Offset - StartBitOffset) / 8; + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, DwarfBitOffset); + } else + // This is not a bitfield. + OffsetInBytes = DT->getOffsetInBits() / 8; + + if (DD->getDwarfVersion() <= 2) { + DIELoc *MemLocationDie = new (DIEValueAllocator) DIELoc; + addUInt(*MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(*MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } + + if (DT->isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT->isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else if (DT->isPublic()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + if (DT->isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + if (DINode *PNode = DT->getObjCProperty()) + if (DIE *PDie = getDIE(PNode)) + MemberDie.addValue(DIEValueAllocator, dwarf::DW_AT_APPLE_property, + dwarf::DW_FORM_ref4, DIEEntry(*PDie)); + + if (DT->isArtificial()) + addFlag(MemberDie, dwarf::DW_AT_artificial); +} + +DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) { + if (!DT) + return nullptr; + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope())); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + if (DIE *StaticMemberDIE = getDIE(DT)) + return StaticMemberDIE; + + DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT); + + const DIType *Ty = resolve(DT->getBaseType()); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT->isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT->isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else if (DT->isPublic()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(DT->getConstant())) + addConstantValue(StaticMemberDIE, CI, Ty); + if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); + + return &StaticMemberDIE; +} + +void DwarfUnit::emitHeader(bool UseOffsets) { + // Emit size of content not including length itself + Asm->OutStreamer->AddComment("Length of Unit"); + Asm->EmitInt32(getHeaderSize() + UnitDie.getSize()); + + Asm->OutStreamer->AddComment("DWARF version number"); + Asm->EmitInt16(DD->getDwarfVersion()); + Asm->OutStreamer->AddComment("Offset Into Abbrev. Section"); + + // We share one abbreviations table across all units so it's always at the + // start of the section. Use a relocatable offset where needed to ensure + // linking doesn't invalidate that offset. + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + Asm->emitDwarfSymbolReference(TLOF.getDwarfAbbrevSection()->getBeginSymbol(), + UseOffsets); + + Asm->OutStreamer->AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); +} + +void DwarfUnit::initSection(MCSection *Section) { + assert(!this->Section); + this->Section = Section; +} + +void DwarfTypeUnit::emitHeader(bool UseOffsets) { + DwarfUnit::emitHeader(UseOffsets); + Asm->OutStreamer->AddComment("Type Signature"); + Asm->OutStreamer->EmitIntValue(TypeSignature, sizeof(TypeSignature)); + Asm->OutStreamer->AddComment("Type DIE Offset"); + // In a skeleton type unit there is no type DIE so emit a zero offset. + Asm->OutStreamer->EmitIntValue(Ty ? Ty->getOffset() : 0, + sizeof(Ty->getOffset())); +} + +bool DwarfTypeUnit::isDwoUnit() const { + // Since there are no skeleton type units, all type units are dwo type units + // when split DWARF is being used. + return DD->useSplitDwarf(); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h new file mode 100644 index 0000000..82760bf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -0,0 +1,403 @@ +//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFUNIT_H + +#include "DwarfDebug.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/DIE.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" + +namespace llvm { + +class MachineLocation; +class MachineOperand; +class ConstantInt; +class ConstantFP; +class DbgVariable; +class DwarfCompileUnit; + +// Data structure to hold a range for range lists. +class RangeSpan { +public: + RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {} + const MCSymbol *getStart() const { return Start; } + const MCSymbol *getEnd() const { return End; } + void setEnd(const MCSymbol *E) { End = E; } + +private: + const MCSymbol *Start, *End; +}; + +class RangeSpanList { +private: + // Index for locating within the debug_range section this particular span. + MCSymbol *RangeSym; + // List of ranges. + SmallVector<RangeSpan, 2> Ranges; + +public: + RangeSpanList(MCSymbol *Sym, SmallVector<RangeSpan, 2> Ranges) + : RangeSym(Sym), Ranges(std::move(Ranges)) {} + MCSymbol *getSym() const { return RangeSym; } + const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; } + void addRange(RangeSpan Range) { Ranges.push_back(Range); } +}; + +//===----------------------------------------------------------------------===// +/// This dwarf writer support class manages information associated with a +/// source file. +class DwarfUnit { +protected: + /// A numeric ID unique among all CUs in the module + unsigned UniqueID; + + /// MDNode for the compile unit. + const DICompileUnit *CUNode; + + // All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + + /// Unit debug information entry. + DIE &UnitDie; + + /// Offset of the UnitDie from beginning of debug info section. + unsigned DebugInfoOffset; + + /// Target of Dwarf emission. + AsmPrinter *Asm; + + // Holders for some common dwarf information. + DwarfDebug *DD; + DwarfFile *DU; + + /// An anonymous type for index type. Owned by UnitDie. + DIE *IndexTyDie; + + /// Tracks the mapping of unit level debug information variables to debug + /// information entries. + DenseMap<const MDNode *, DIE *> MDNodeToDieMap; + + /// A list of all the DIEBlocks in use. + std::vector<DIEBlock *> DIEBlocks; + + /// A list of all the DIELocs in use. + std::vector<DIELoc *> DIELocs; + + /// This map is used to keep track of subprogram DIEs that need + /// DW_AT_containing_type attribute. This attribute points to a DIE that + /// corresponds to the MDNode mapped with the subprogram DIE. + DenseMap<DIE *, const DINode *> ContainingTypeMap; + + /// The section this unit will be emitted in. + MCSection *Section; + + DwarfUnit(unsigned UID, dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU); + + bool applySubprogramDefinitionAttributes(const DISubprogram *SP, DIE &SPDie); + +public: + virtual ~DwarfUnit(); + + void initSection(MCSection *Section); + + MCSection *getSection() const { + assert(Section); + return Section; + } + + // Accessors. + AsmPrinter* getAsmPrinter() const { return Asm; } + unsigned getUniqueID() const { return UniqueID; } + uint16_t getLanguage() const { return CUNode->getSourceLanguage(); } + const DICompileUnit *getCUNode() const { return CUNode; } + DIE &getUnitDie() { return UnitDie; } + + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + + /// Return true if this compile unit has something to write out. + bool hasContent() const { return UnitDie.hasChildren(); } + + /// Get string containing language specific context for a global name. + /// + /// Walks the metadata parent chain in a language specific manner (using the + /// compile unit language) and returns it as a string. This is done at the + /// metadata level because DIEs may not currently have been added to the + /// parent context and walking the DIEs looking for names is more expensive + /// than walking the metadata. + std::string getParentContextString(const DIScope *Context) const; + + /// Add a new global name to the compile unit. + virtual void addGlobalName(StringRef Name, DIE &Die, const DIScope *Context) { + } + + /// Add a new global type to the compile unit. + virtual void addGlobalType(const DIType *Ty, const DIE &Die, + const DIScope *Context) {} + + /// Returns the DIE map slot for the specified debug variable. + /// + /// We delegate the request to DwarfDebug when the MDNode can be part of the + /// type system, since DIEs for the type system can be shared across CUs and + /// the mappings are kept in DwarfDebug. + DIE *getDIE(const DINode *D) const; + + /// Returns a fresh newly allocated DIELoc. + DIELoc *getDIELoc() { return new (DIEValueAllocator) DIELoc; } + + /// Insert DIE into the map. + /// + /// We delegate the request to DwarfDebug when the MDNode can be part of the + /// type system, since DIEs for the type system can be shared across CUs and + /// the mappings are kept in DwarfDebug. + void insertDIE(const DINode *Desc, DIE *D); + + /// Add a flag that is true to the DIE. + void addFlag(DIE &Die, dwarf::Attribute Attribute); + + /// Add an unsigned integer attribute data and value. + void addUInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, uint64_t Integer); + + void addUInt(DIEValueList &Block, dwarf::Form Form, uint64_t Integer); + + /// Add an signed integer attribute data and value. + void addSInt(DIEValueList &Die, dwarf::Attribute Attribute, + Optional<dwarf::Form> Form, int64_t Integer); + + void addSInt(DIELoc &Die, Optional<dwarf::Form> Form, int64_t Integer); + + /// Add a string attribute data and value. + /// + /// We always emit a reference to the string pool instead of immediate + /// strings so that DIEs have more predictable sizes. In the case of split + /// dwarf we emit an index into another table which gets us the static offset + /// into the string table. + void addString(DIE &Die, dwarf::Attribute Attribute, StringRef Str); + + /// Add a Dwarf label attribute data and value. + DIEValueList::value_iterator addLabel(DIEValueList &Die, + dwarf::Attribute Attribute, + dwarf::Form Form, + const MCSymbol *Label); + + void addLabel(DIELoc &Die, dwarf::Form Form, const MCSymbol *Label); + + /// Add an offset into a section attribute data and value. + void addSectionOffset(DIE &Die, dwarf::Attribute Attribute, uint64_t Integer); + + /// Add a dwarf op address data and value using the form given and an + /// op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + void addOpAddress(DIELoc &Die, const MCSymbol *Label); + + /// Add a label delta attribute data and value. + void addLabelDelta(DIE &Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// Add a DIE attribute data and value. + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIE &Entry); + + /// Add a DIE attribute data and value. + void addDIEEntry(DIE &Die, dwarf::Attribute Attribute, DIEEntry Entry); + + /// Add a type's DW_AT_signature and set the declaration flag. + void addDIETypeSignature(DIE &Die, const DwarfTypeUnit &Type); + /// Add an attribute containing the type signature for a unique identifier. + void addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute, + StringRef Identifier); + + /// Add block data. + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIELoc *Block); + + /// Add block data. + void addBlock(DIE &Die, dwarf::Attribute Attribute, DIEBlock *Block); + + /// Add location information to specified debug information entry. + void addSourceLine(DIE &Die, unsigned Line, StringRef File, + StringRef Directory); + void addSourceLine(DIE &Die, const DILocalVariable *V); + void addSourceLine(DIE &Die, const DIGlobalVariable *G); + void addSourceLine(DIE &Die, const DISubprogram *SP); + void addSourceLine(DIE &Die, const DIType *Ty); + void addSourceLine(DIE &Die, const DINamespace *NS); + void addSourceLine(DIE &Die, const DIObjCProperty *Ty); + + /// Add constant value entry in variable DIE. + void addConstantValue(DIE &Die, const MachineOperand &MO, const DIType *Ty); + void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty); + void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty); + void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val); + + /// Add constant value entry in variable DIE. + void addConstantFPValue(DIE &Die, const MachineOperand &MO); + void addConstantFPValue(DIE &Die, const ConstantFP *CFP); + + /// Add a linkage name, if it isn't empty. + void addLinkageName(DIE &Die, StringRef LinkageName); + + /// Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DINodeArray TParams); + + /// Add register operand. + /// \returns false if the register does not exist, e.g., because it was never + /// materialized. + bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg, + unsigned SizeInBits = 0, unsigned OffsetInBits = 0); + + /// Add register offset. + /// \returns false if the register does not exist, e.g., because it was never + /// materialized. + bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// Start with the address based on the location provided, and generate the + /// DWARF information necessary to find the actual Block variable (navigating + /// the Block struct) based on the starting location. Add the DWARF + /// information to the die. Obsolete, please use addComplexAddress instead. + void addBlockByrefAddress(const DbgVariable &DV, DIE &Die, + dwarf::Attribute Attribute, + const MachineLocation &Location); + + /// Add a new type attribute to the specified entity. + /// + /// This takes and attribute parameter because DW_AT_friend attributes are + /// also type references. + void addType(DIE &Entity, const DIType *Ty, + dwarf::Attribute Attribute = dwarf::DW_AT_type); + + DIE *getOrCreateNameSpace(const DINamespace *NS); + DIE *getOrCreateModule(const DIModule *M); + DIE *getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal = false); + + void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie, + bool Minimal = false); + + /// Find existing DIE or create new DIE for the given type. + DIE *getOrCreateTypeDIE(const MDNode *N); + + /// Get context owner's DIE. + DIE *createTypeDIE(const DICompositeType *Ty); + + /// Get context owner's DIE. + DIE *getOrCreateContextDIE(const DIScope *Context); + + /// Construct DIEs for types that contain vtables. + void constructContainingTypeDIEs(); + + /// Construct function argument DIEs. + void constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE &createAndAddDIE(unsigned Tag, DIE &Parent, const DINode *N = nullptr); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + virtual unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } + + /// Emit the header for this unit, not including the initial length field. + virtual void emitHeader(bool UseOffsets); + + virtual DwarfCompileUnit &getCU() = 0; + + void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy); + +protected: + /// Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(const DIDerivedType *DT); + + /// Look up the source ID with the given directory and source file names. If + /// none currently exists, create a new ID and insert it in the line table. + virtual unsigned getOrCreateSourceID(StringRef File, StringRef Directory) = 0; + + /// Look in the DwarfDebug map for the MDNode that corresponds to the + /// reference. + template <typename T> T *resolve(TypedDINodeRef<T> Ref) const { + return DD->resolve(Ref); + } + +private: + void constructTypeDIE(DIE &Buffer, const DIBasicType *BTy); + void constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy); + void constructTypeDIE(DIE &Buffer, const DISubroutineType *DTy); + void constructSubrangeDIE(DIE &Buffer, const DISubrange *SR, DIE *IndexTy); + void constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy); + void constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy); + void constructMemberDIE(DIE &Buffer, const DIDerivedType *DT); + void constructTemplateTypeParameterDIE(DIE &Buffer, + const DITemplateTypeParameter *TP); + void constructTemplateValueParameterDIE(DIE &Buffer, + const DITemplateValueParameter *TVP); + + /// Return the default lower bound for an array. + /// + /// If the DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; + + /// Get an anonymous type for index type. + DIE *getIndexTyDie(); + + /// Set D as anonymous type for index which can be reused later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// If this is a named finished type then include it in the list of types for + /// the accelerator tables. + void updateAcceleratorTables(const DIScope *Context, const DIType *Ty, + const DIE &TyDIE); + + virtual bool isDwoUnit() const = 0; +}; + +class DwarfTypeUnit : public DwarfUnit { + uint64_t TypeSignature; + const DIE *Ty; + DwarfCompileUnit &CU; + MCDwarfDwoLineTable *SplitLineTable; + + unsigned getOrCreateSourceID(StringRef File, StringRef Directory) override; + bool isDwoUnit() const override; + +public: + DwarfTypeUnit(unsigned UID, DwarfCompileUnit &CU, AsmPrinter *A, + DwarfDebug *DW, DwarfFile *DWU, + MCDwarfDwoLineTable *SplitLineTable = nullptr); + + void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; } + uint64_t getTypeSignature() const { return TypeSignature; } + void setType(const DIE *Ty) { this->Ty = Ty; } + + /// Emit the header for this unit, not including the initial length field. + void emitHeader(bool UseOffsets) override; + unsigned getHeaderSize() const override { + return DwarfUnit::getHeaderSize() + sizeof(uint64_t) + // Type Signature + sizeof(uint32_t); // Type DIE Offset + } + DwarfCompileUnit &getCU() override { return CU; } +}; +} // end llvm namespace +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp new file mode 100644 index 0000000..e24dcb1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -0,0 +1,689 @@ +//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#include "EHStreamer.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +using namespace llvm; + +EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} + +EHStreamer::~EHStreamer() {} + +/// How many leading type ids two landing pads have in common. +unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R) { + const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; + unsigned LSize = LIds.size(), RSize = RIds.size(); + unsigned MinSize = LSize < RSize ? LSize : RSize; + unsigned Count = 0; + + for (; Count != MinSize; ++Count) + if (LIds[Count] != RIds[Count]) + return Count; + + return Count; +} + +/// Compute the actions table and gather the first action index for each landing +/// pad site. +unsigned EHStreamer:: +computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions) { + + // The action table follows the call-site table in the LSDA. The individual + // records are of two types: + // + // * Catch clause + // * Exception specification + // + // The two record kinds have the same format, with only small differences. + // They are distinguished by the "switch value" field: Catch clauses + // (TypeInfos) have strictly positive switch values, and exception + // specifications (FilterIds) have strictly negative switch values. Value 0 + // indicates a catch-all clause. + // + // Negative type IDs index into FilterIds. Positive type IDs index into + // TypeInfos. The value written for a positive type ID is just the type ID + // itself. For a negative type ID, however, the value written is the + // (negative) byte offset of the corresponding FilterIds entry. The byte + // offset is usually equal to the type ID (because the FilterIds entries are + // written using a variable width encoding, which outputs one byte per entry + // as long as the value written is not too large) but can differ. This kind + // of complication does not occur for positive type IDs because type infos are + // output using a fixed width encoding. FilterOffsets[i] holds the byte + // offset corresponding to FilterIds[i]. + + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + SmallVector<int, 16> FilterOffsets; + FilterOffsets.reserve(FilterIds.size()); + int Offset = -1; + + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I != E; ++I) { + FilterOffsets.push_back(Offset); + Offset -= getULEB128Size(*I); + } + + FirstActions.reserve(LandingPads.size()); + + int FirstAction = 0; + unsigned SizeActions = 0; + const LandingPadInfo *PrevLPI = nullptr; + + for (SmallVectorImpl<const LandingPadInfo *>::const_iterator + I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { + const LandingPadInfo *LPI = *I; + const std::vector<int> &TypeIds = LPI->TypeIds; + unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; + unsigned SizeSiteActions = 0; + + if (NumShared < TypeIds.size()) { + unsigned SizeAction = 0; + unsigned PrevAction = (unsigned)-1; + + if (NumShared) { + unsigned SizePrevIds = PrevLPI->TypeIds.size(); + assert(Actions.size()); + PrevAction = Actions.size() - 1; + SizeAction = getSLEB128Size(Actions[PrevAction].NextAction) + + getSLEB128Size(Actions[PrevAction].ValueForTypeID); + + for (unsigned j = NumShared; j != SizePrevIds; ++j) { + assert(PrevAction != (unsigned)-1 && "PrevAction is invalid!"); + SizeAction -= getSLEB128Size(Actions[PrevAction].ValueForTypeID); + SizeAction += -Actions[PrevAction].NextAction; + PrevAction = Actions[PrevAction].Previous; + } + } + + // Compute the actions. + for (unsigned J = NumShared, M = TypeIds.size(); J != M; ++J) { + int TypeID = TypeIds[J]; + assert(-1 - TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); + int ValueForTypeID = + isFilterEHSelector(TypeID) ? FilterOffsets[-1 - TypeID] : TypeID; + unsigned SizeTypeID = getSLEB128Size(ValueForTypeID); + + int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; + SizeAction = SizeTypeID + getSLEB128Size(NextAction); + SizeSiteActions += SizeAction; + + ActionEntry Action = { ValueForTypeID, NextAction, PrevAction }; + Actions.push_back(Action); + PrevAction = Actions.size() - 1; + } + + // Record the first action of the landing pad site. + FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; + } // else identical - re-use previous FirstAction + + // Information used when created the call-site table. The action record + // field of the call site record is the offset of the first associated + // action record, relative to the start of the actions table. This value is + // biased by 1 (1 indicating the start of the actions table), and 0 + // indicates that there are no actions. + FirstActions.push_back(FirstAction); + + // Compute this sites contribution to size. + SizeActions += SizeSiteActions; + + PrevLPI = LPI; + } + + return SizeActions; +} + +/// Return `true' if this is a call to a function marked `nounwind'. Return +/// `false' otherwise. +bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { + assert(MI->isCall() && "This should be a call instruction!"); + + bool MarkedNoUnwind = false; + bool SawFunc = false; + + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + + if (!MO.isGlobal()) continue; + + const Function *F = dyn_cast<Function>(MO.getGlobal()); + if (!F) continue; + + if (SawFunc) { + // Be conservative. If we have more than one function operand for this + // call, then we can't make the assumption that it's the callee and + // not a parameter to the call. + // + // FIXME: Determine if there's a way to say that `F' is the callee or + // parameter. + MarkedNoUnwind = false; + break; + } + + MarkedNoUnwind = F->doesNotThrow(); + SawFunc = true; + } + + return MarkedNoUnwind; +} + +void EHStreamer::computePadMap( + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap) { + // Invokes and nounwind calls have entries in PadMap (due to being bracketed + // by try-range labels when lowered). Ordinary calls do not, so appropriate + // try-ranges for them need be deduced so we can put them in the LSDA. + for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { + const LandingPadInfo *LandingPad = LandingPads[i]; + for (unsigned j = 0, E = LandingPad->BeginLabels.size(); j != E; ++j) { + MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; + assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); + PadRange P = { i, j }; + PadMap[BeginLabel] = P; + } + } +} + +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + const SmallVectorImpl<unsigned> &FirstActions) { + RangeMapType PadMap; + computePadMap(LandingPads, PadMap); + + // The end label of the previous invoke or nounwind try-range. + MCSymbol *LastLabel = nullptr; + + // Whether there is a potentially throwing instruction (currently this means + // an ordinary call) between the end of the previous try-range and now. + bool SawPotentiallyThrowing = false; + + // Whether the last CallSite entry was for an invoke. + bool PreviousIsInvoke = false; + + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + + // Visit all instructions in order of address. + for (const auto &MBB : *Asm->MF) { + for (const auto &MI : MBB) { + if (!MI.isEHLabel()) { + if (MI.isCall()) + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); + continue; + } + + // End of the previous try-range? + MCSymbol *BeginLabel = MI.getOperand(0).getMCSymbol(); + if (BeginLabel == LastLabel) + SawPotentiallyThrowing = false; + + // Beginning of a new try-range? + RangeMapType::const_iterator L = PadMap.find(BeginLabel); + if (L == PadMap.end()) + // Nope, it was just some random label. + continue; + + const PadRange &P = L->second; + const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; + assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && + "Inconsistent landing pad map!"); + + // For Dwarf exception handling (SjLj handling doesn't use this). If some + // instruction between the previous try-range and this one may throw, + // create a call-site entry with no landing pad for the region between the + // try-ranges. + if (SawPotentiallyThrowing && Asm->MAI->usesCFIForEH()) { + CallSiteEntry Site = { LastLabel, BeginLabel, nullptr, 0 }; + CallSites.push_back(Site); + PreviousIsInvoke = false; + } + + LastLabel = LandingPad->EndLabels[P.RangeIndex]; + assert(BeginLabel && LastLabel && "Invalid landing pad!"); + + if (!LandingPad->LandingPadLabel) { + // Create a gap. + PreviousIsInvoke = false; + } else { + // This try-range is for an invoke. + CallSiteEntry Site = { + BeginLabel, + LastLabel, + LandingPad, + FirstActions[P.PadIndex] + }; + + // Try to merge with the previous call-site. SJLJ doesn't do this + if (PreviousIsInvoke && !IsSJLJ) { + CallSiteEntry &Prev = CallSites.back(); + if (Site.LPad == Prev.LPad && Site.Action == Prev.Action) { + // Extend the range of the previous entry. + Prev.EndLabel = Site.EndLabel; + continue; + } + } + + // Otherwise, create a new call-site. + if (!IsSJLJ) + CallSites.push_back(Site); + else { + // SjLj EH must maintain the call sites in the order assigned + // to them by the SjLjPrepare pass. + unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel); + if (CallSites.size() < SiteNo) + CallSites.resize(SiteNo); + CallSites[SiteNo - 1] = Site; + } + PreviousIsInvoke = true; + } + } + } + + // If some instruction between the previous try-range and the end of the + // function may throw, create a call-site entry with no landing pad for the + // region following the try-range. + if (SawPotentiallyThrowing && !IsSJLJ && LastLabel != nullptr) { + CallSiteEntry Site = { LastLabel, nullptr, nullptr, 0 }; + CallSites.push_back(Site); + } +} + +/// Emit landing pads and actions. +/// +/// The general organization of the table is complex, but the basic concepts are +/// easy. First there is a header which describes the location and organization +/// of the three components that follow. +/// +/// 1. The landing pad site information describes the range of code covered by +/// the try. In our case it's an accumulation of the ranges covered by the +/// invokes in the try. There is also a reference to the landing pad that +/// handles the exception once processed. Finally an index into the actions +/// table. +/// 2. The action table, in our case, is composed of pairs of type IDs and next +/// action offset. Starting with the action index from the landing pad +/// site, each type ID is checked for a match to the current exception. If +/// it matches then the exception and type id are passed on to the landing +/// pad. Otherwise the next action is looked up. This chain is terminated +/// with a next action of zero. If no type id is found then the frame is +/// unwound and handling continues. +/// 3. Type ID table contains references to all the C++ typeinfo for all +/// catches in the function. This tables is reverse indexed base 1. +void EHStreamer::emitExceptionTable() { + const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); + + // Sort the landing pads in order of their type ids. This is used to fold + // duplicate actions. + SmallVector<const LandingPadInfo *, 64> LandingPads; + LandingPads.reserve(PadInfos.size()); + + for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) + LandingPads.push_back(&PadInfos[i]); + + // Order landing pads lexicographically by type id. + std::sort(LandingPads.begin(), LandingPads.end(), + [](const LandingPadInfo *L, + const LandingPadInfo *R) { return L->TypeIds < R->TypeIds; }); + + // Compute the actions table and gather the first action index for each + // landing pad site. + SmallVector<ActionEntry, 32> Actions; + SmallVector<unsigned, 64> FirstActions; + unsigned SizeActions = + computeActionsTable(LandingPads, Actions, FirstActions); + + // Compute the call-site table. + SmallVector<CallSiteEntry, 64> CallSites; + computeCallSiteTable(CallSites, LandingPads, FirstActions); + + // Final tallies. + + // Call sites. + bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; + bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true; + + unsigned CallSiteTableLength; + if (IsSJLJ) + CallSiteTableLength = 0; + else { + unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4 + unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4 + CallSiteTableLength = + CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize); + } + + for (unsigned i = 0, e = CallSites.size(); i < e; ++i) { + CallSiteTableLength += getULEB128Size(CallSites[i].Action); + if (IsSJLJ) + CallSiteTableLength += getULEB128Size(i); + } + + // Type infos. + MCSection *LSDASection = Asm->getObjFileLowering().getLSDASection(); + unsigned TTypeEncoding; + unsigned TypeFormatSize; + + if (!HaveTTData) { + // For SjLj exceptions, if there is no TypeInfo, then we just explicitly say + // that we're omitting that bit. + TTypeEncoding = dwarf::DW_EH_PE_omit; + // dwarf::DW_EH_PE_absptr + TypeFormatSize = Asm->getDataLayout().getPointerSize(); + } else { + // Okay, we have actual filters or typeinfos to emit. As such, we need to + // pick a type encoding for them. We're about to emit a list of pointers to + // typeinfo objects at the end of the LSDA. However, unless we're in static + // mode, this reference will require a relocation by the dynamic linker. + // + // Because of this, we have a couple of options: + // + // 1) If we are in -static mode, we can always use an absolute reference + // from the LSDA, because the static linker will resolve it. + // + // 2) Otherwise, if the LSDA section is writable, we can output the direct + // reference to the typeinfo and allow the dynamic linker to relocate + // it. Since it is in a writable section, the dynamic linker won't + // have a problem. + // + // 3) Finally, if we're in PIC mode and the LDSA section isn't writable, + // we need to use some form of indirection. For example, on Darwin, + // we can output a statically-relocatable reference to a dyld stub. The + // offset to the stub is constant, but the contents are in a section + // that is updated by the dynamic linker. This is easy enough, but we + // need to tell the personality function of the unwinder to indirect + // through the dyld stub. + // + // FIXME: When (3) is actually implemented, we'll have to emit the stubs + // somewhere. This predicate should be moved to a shared location that is + // in target-independent code. + // + TTypeEncoding = Asm->getObjFileLowering().getTTypeEncoding(); + TypeFormatSize = Asm->GetSizeOfEncodedValue(TTypeEncoding); + } + + // Begin the exception table. + // Sometimes we want not to emit the data into separate section (e.g. ARM + // EHABI). In this case LSDASection will be NULL. + if (LSDASection) + Asm->OutStreamer->SwitchSection(LSDASection); + Asm->EmitAlignment(2); + + // Emit the LSDA. + MCSymbol *GCCETSym = + Asm->OutContext.getOrCreateSymbol(Twine("GCC_except_table")+ + Twine(Asm->getFunctionNumber())); + Asm->OutStreamer->EmitLabel(GCCETSym); + Asm->OutStreamer->EmitLabel(Asm->getCurExceptionSym()); + + // Emit the LSDA header. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart"); + Asm->EmitEncodingByte(TTypeEncoding, "@TType"); + + // The type infos need to be aligned. GCC does this by inserting padding just + // before the type infos. However, this changes the size of the exception + // table, so you need to take this into account when you output the exception + // table size. However, the size is output using a variable length encoding. + // So by increasing the size by inserting padding, you may increase the number + // of bytes used for writing the size. If it increases, say by one byte, then + // you now need to output one less byte of padding to get the type infos + // aligned. However this decreases the size of the exception table. This + // changes the value you have to output for the exception table size. Due to + // the variable length encoding, the number of bytes used for writing the + // length may decrease. If so, you then have to increase the amount of + // padding. And so on. If you look carefully at the GCC code you will see that + // it indeed does this in a loop, going on and on until the values stabilize. + // We chose another solution: don't output padding inside the table like GCC + // does, instead output it before the table. + unsigned SizeTypes = TypeInfos.size() * TypeFormatSize; + unsigned CallSiteTableLengthSize = getULEB128Size(CallSiteTableLength); + unsigned TTypeBaseOffset = + sizeof(int8_t) + // Call site format + CallSiteTableLengthSize + // Call site table length size + CallSiteTableLength + // Call site table length + SizeActions + // Actions size + SizeTypes; + unsigned TTypeBaseOffsetSize = getULEB128Size(TTypeBaseOffset); + unsigned TotalSize = + sizeof(int8_t) + // LPStart format + sizeof(int8_t) + // TType format + (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size + TTypeBaseOffset; // TType base offset + unsigned SizeAlign = (4 - TotalSize) & 3; + + if (HaveTTData) { + // Account for any extra padding that will be added to the call site table + // length. + Asm->EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign); + SizeAlign = 0; + } + + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + // SjLj Exception handling + if (IsSJLJ) { + Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); + + // Add extra padding if it wasn't added to the TType base offset. + Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + + // Emit the landing pad site information. + unsigned idx = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) { + const CallSiteEntry &S = *I; + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + if (VerboseAsm) { + Asm->OutStreamer->AddComment(">> Call Site " + Twine(idx) + " <<"); + Asm->OutStreamer->AddComment(" On exception at call site "+Twine(idx)); + } + Asm->EmitULEB128(idx); + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer->AddComment(" Action: cleanup"); + else + Asm->OutStreamer->AddComment(" Action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->EmitULEB128(S.Action); + } + } else { + // Itanium LSDA exception handling + + // The call-site table is a list of all call sites that may throw an + // exception (including C++ 'throw' statements) in the procedure + // fragment. It immediately follows the LSDA header. Each entry indicates, + // for a given call, the first corresponding action record and corresponding + // landing pad. + // + // The table begins with the number of bytes, stored as an LEB128 + // compressed, unsigned integer. The records immediately follow the record + // count. They are sorted in increasing call-site address. Each record + // indicates: + // + // * The position of the call-site. + // * The position of the landing pad. + // * The first action record for that call site. + // + // A missing entry in the call-site table indicates that a call is not + // supposed to throw. + + // Emit the landing pad call site table. + Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site"); + + // Add extra padding if it wasn't added to the TType base offset. + Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign); + + unsigned Entry = 0; + for (SmallVectorImpl<CallSiteEntry>::const_iterator + I = CallSites.begin(), E = CallSites.end(); I != E; ++I) { + const CallSiteEntry &S = *I; + + MCSymbol *EHFuncBeginSym = Asm->getFunctionBegin(); + + MCSymbol *BeginLabel = S.BeginLabel; + if (!BeginLabel) + BeginLabel = EHFuncBeginSym; + MCSymbol *EndLabel = S.EndLabel; + if (!EndLabel) + EndLabel = Asm->getFunctionEnd(); + + // Offset of the call site relative to the previous call site, counted in + // number of 16-byte bundles. The first call site is counted relative to + // the start of the procedure fragment. + if (VerboseAsm) + Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<"); + Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4); + if (VerboseAsm) + Asm->OutStreamer->AddComment(Twine(" Call between ") + + BeginLabel->getName() + " and " + + EndLabel->getName()); + Asm->EmitLabelDifference(EndLabel, BeginLabel, 4); + + // Offset of the landing pad, counted in 16-byte bundles relative to the + // @LPStart address. + if (!S.LPad) { + if (VerboseAsm) + Asm->OutStreamer->AddComment(" has no landing pad"); + Asm->OutStreamer->EmitIntValue(0, 4/*size*/); + } else { + if (VerboseAsm) + Asm->OutStreamer->AddComment(Twine(" jumps to ") + + S.LPad->LandingPadLabel->getName()); + Asm->EmitLabelDifference(S.LPad->LandingPadLabel, EHFuncBeginSym, 4); + } + + // Offset of the first associated action record, relative to the start of + // the action table. This value is biased by 1 (1 indicates the start of + // the action table), and 0 indicates that there are no actions. + if (VerboseAsm) { + if (S.Action == 0) + Asm->OutStreamer->AddComment(" On action: cleanup"); + else + Asm->OutStreamer->AddComment(" On action: " + + Twine((S.Action - 1) / 2 + 1)); + } + Asm->EmitULEB128(S.Action); + } + } + + // Emit the Action Table. + int Entry = 0; + for (SmallVectorImpl<ActionEntry>::const_iterator + I = Actions.begin(), E = Actions.end(); I != E; ++I) { + const ActionEntry &Action = *I; + + if (VerboseAsm) { + // Emit comments that decode the action table. + Asm->OutStreamer->AddComment(">> Action Record " + Twine(++Entry) + " <<"); + } + + // Type Filter + // + // Used by the runtime to match the type of the thrown exception to the + // type of the catch clauses or the types in the exception specification. + if (VerboseAsm) { + if (Action.ValueForTypeID > 0) + Asm->OutStreamer->AddComment(" Catch TypeInfo " + + Twine(Action.ValueForTypeID)); + else if (Action.ValueForTypeID < 0) + Asm->OutStreamer->AddComment(" Filter TypeInfo " + + Twine(Action.ValueForTypeID)); + else + Asm->OutStreamer->AddComment(" Cleanup"); + } + Asm->EmitSLEB128(Action.ValueForTypeID); + + // Action Record + // + // Self-relative signed displacement in bytes of the next action record, + // or 0 if there is no next action record. + if (VerboseAsm) { + if (Action.NextAction == 0) { + Asm->OutStreamer->AddComment(" No further actions"); + } else { + unsigned NextAction = Entry + (Action.NextAction + 1) / 2; + Asm->OutStreamer->AddComment(" Continue to action "+Twine(NextAction)); + } + } + Asm->EmitSLEB128(Action.NextAction); + } + + emitTypeInfos(TTypeEncoding); + + Asm->EmitAlignment(2); +} + +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { + const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos(); + const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer->isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer->AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (const GlobalValue *GV : make_range(TypeInfos.rbegin(), + TypeInfos.rend())) { + if (VerboseAsm) + Asm->OutStreamer->AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer->AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer->AddBlankLine(); + Entry = 0; + } + for (std::vector<unsigned>::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (isFilterEHSelector(TypeID)) + Asm->OutStreamer->AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitULEB128(TypeID); + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h new file mode 100644 index 0000000..c6a0e9d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -0,0 +1,138 @@ +//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_EHSTREAMER_H + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +struct LandingPadInfo; +class MachineModuleInfo; +class MachineInstr; +class MachineFunction; +class AsmPrinter; +class MCSymbol; +class MCSymbolRefExpr; + +template <typename T> +class SmallVectorImpl; + +/// Emits exception handling directives. +class LLVM_LIBRARY_VISIBILITY EHStreamer : public AsmPrinterHandler { +protected: + /// Target of directive emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// How many leading type ids two landing pads have in common. + static unsigned sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + + /// Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // Null indicates the start of the function. + MCSymbol *EndLabel; // Null indicates the end of the function. + + // LPad contains the landing pad start labels. + const LandingPadInfo *LPad; // Null indicates that there is no landing pad. + unsigned Action; + }; + + /// Compute the actions table and gather the first action index for each + /// landing pad site. + unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + void computePadMap(const SmallVectorImpl<const LandingPadInfo *> &LandingPads, + RangeMapType &PadMap); + + /// Compute the call-site table. The entry for an invoke has a try-range + /// containing the call, a non-zero landing pad and an appropriate action. + /// The entry for an ordinary call has a try-range containing the call and + /// zero for the landing pad and the action. Calls marked 'nounwind' have + /// no entry and must not be contained in the try-range of any entry - they + /// form gaps in the table. Entries must be ordered by try-range address. + void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); + + /// Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + void emitExceptionTable(); + + virtual void emitTypeInfos(unsigned TTypeEncoding); + + // Helpers for for identifying what kind of clause an EH typeid or selector + // corresponds to. Negative selectors are for filter clauses, the zero + // selector is for cleanups, and positive selectors are for catch clauses. + static bool isFilterEHSelector(int Selector) { return Selector < 0; } + static bool isCleanupEHSelector(int Selector) { return Selector == 0; } + static bool isCatchEHSelector(int Selector) { return Selector > 0; } + +public: + EHStreamer(AsmPrinter *A); + ~EHStreamer() override; + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + static bool callToNoUnwindFunction(const MachineInstr *MI); +}; +} + +#endif + diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp new file mode 100644 index 0000000..6a023b9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -0,0 +1,123 @@ +//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the compiler plugin that is used in order to emit +// garbage collection information in a convenient layout for parsing and +// loading in the Erlang/OTP runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/CodeGen/GCs.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { + +class ErlangGCPrinter : public GCMetadataPrinter { +public: + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; +} + +static GCMetadataPrinterRegistry::Add<ErlangGCPrinter> + X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGCPrinter() {} + +void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + MCStreamer &OS = *AP.OutStreamer; + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); + + // Put this in a custom .note section. + OS.SwitchSection( + AP.getObjFileLowering().getContext().getELFSection(".note.gc", + ELF::SHT_PROGBITS, 0)); + + // For each function... + for (GCModuleInfo::FuncInfoVec::iterator FI = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + FI != IE; ++FI) { + GCFunctionInfo &MD = **FI; + if (MD.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; + /** A compact GC layout. Emit this data structure: + * + * struct { + * int16_t PointCount; + * void *SafePointAddress[PointCount]; + * int16_t StackFrameSize; (in words) + * int16_t StackArity; + * int16_t LiveCount; + * int16_t LiveOffsets[LiveCount]; + * } __gcmap_<FUNCTIONNAME>; + **/ + + // Align to address width. + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + // Emit PointCount. + OS.AddComment("safe point count"); + AP.EmitInt16(MD.size()); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; + ++PI) { + // Emit the address of the safe point. + OS.AddComment("safe point address"); + MCSymbol *Label = PI->Label; + AP.EmitLabelPlusOffset(Label /*Hi*/, 0 /*Offset*/, 4 /*Size*/); + } + + // Stack information never change in safe points! Only print info from the + // first call-site. + GCFunctionInfo::iterator PI = MD.begin(); + + // Emit the stack frame size. + OS.AddComment("stack frame size (in words)"); + AP.EmitInt16(MD.getFrameSize() / IntPtrSize); + + // Emit stack arity, i.e. the number of stacked arguments. + unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs + ? MD.getFunction().arg_size() - RegisteredArgs + : 0; + OS.AddComment("stack arity"); + AP.EmitInt16(StackArity); + + // Emit the number of live roots in the function. + OS.AddComment("live root count"); + AP.EmitInt16(MD.live_size(PI)); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Emit live root's offset within the stack frame. + OS.AddComment("stack index (offset / wordsize)"); + AP.EmitInt16(LI->StackOffset / IntPtrSize); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp new file mode 100644 index 0000000..c09ef6a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -0,0 +1,182 @@ +//===-- OcamlGCPrinter.cpp - Ocaml frametable emitter ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements printing the assembly code for an Ocaml frametable. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cctype> +using namespace llvm; + +namespace { + +class OcamlGCMetadataPrinter : public GCMetadataPrinter { +public: + void beginAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; + void finishAssembly(Module &M, GCModuleInfo &Info, AsmPrinter &AP) override; +}; +} + +static GCMetadataPrinterRegistry::Add<OcamlGCMetadataPrinter> + Y("ocaml", "ocaml 3.10-compatible collector"); + +void llvm::linkOcamlGCPrinter() {} + +static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string SymName; + SymName += "caml"; + size_t Letter = SymName.size(); + SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + SymName += "__"; + SymName += Id; + + // Capitalize the first letter of the module name. + SymName[Letter] = toupper(SymName[Letter]); + + SmallString<128> TmpStr; + Mangler::getNameWithPrefix(TmpStr, SymName, M.getDataLayout()); + + MCSymbol *Sym = AP.OutContext.getOrCreateSymbol(TmpStr); + + AP.OutStreamer->EmitSymbolAttribute(Sym, MCSA_Global); + AP.OutStreamer->EmitLabel(Sym); +} + +void OcamlGCMetadataPrinter::beginAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(M, AP, "code_begin"); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(M, AP, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the GC. +/// +void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info, + AsmPrinter &AP) { + unsigned IntPtrSize = M.getDataLayout().getPointerSize(); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getTextSection()); + EmitCamlGlobal(M, AP, "code_end"); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(M, AP, "data_end"); + + // FIXME: Why does ocaml emit this?? + AP.OutStreamer->EmitIntValue(0, IntPtrSize); + + AP.OutStreamer->SwitchSection(AP.getObjFileLowering().getDataSection()); + EmitCamlGlobal(M, AP, "frametable"); + + int NumDescriptors = 0; + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + I != IE; ++I) { + GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + NumDescriptors++; + } + } + + if (NumDescriptors >= 1 << 16) { + // Very rude! + report_fatal_error(" Too much descriptor for ocaml GC"); + } + AP.EmitInt16(NumDescriptors); + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(), + IE = Info.funcinfo_end(); + I != IE; ++I) { + GCFunctionInfo &FI = **I; + if (FI.getStrategy().getName() != getStrategy().getName()) + // this function is managed by some other GC + continue; + + uint64_t FrameSize = FI.getFrameSize(); + if (FrameSize >= 1 << 16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Frame size " + + Twine(FrameSize) + ">= 65536.\n" + "(" + + Twine(uintptr_t(&FI)) + ")"); + } + + AP.OutStreamer->AddComment("live roots for " + + Twine(FI.getFunction().getName())); + AP.OutStreamer->AddBlankLine(); + + for (GCFunctionInfo::iterator J = FI.begin(), JE = FI.end(); J != JE; ++J) { + size_t LiveCount = FI.live_size(J); + if (LiveCount >= 1 << 16) { + // Very rude! + report_fatal_error("Function '" + FI.getFunction().getName() + + "' is too large for the ocaml GC! " + "Live root count " + + Twine(LiveCount) + " >= 65536."); + } + + AP.OutStreamer->EmitSymbolValue(J->Label, IntPtrSize); + AP.EmitInt16(FrameSize); + AP.EmitInt16(LiveCount); + + for (GCFunctionInfo::live_iterator K = FI.live_begin(J), + KE = FI.live_end(J); + K != KE; ++K) { + if (K->StackOffset >= 1 << 16) { + // Very rude! + report_fatal_error( + "GC root stack offset is outside of fixed stack frame and out " + "of range for ocaml GC!"); + } + AP.EmitInt16(K->StackOffset); + } + + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp new file mode 100644 index 0000000..c2c0f84 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -0,0 +1,396 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing line tables info into COFF files. +// +//===----------------------------------------------------------------------===// + +#include "WinCodeViewLineTables.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/COFF.h" + +namespace llvm { + +StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) { + assert(S); + assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) || + isa<DILexicalBlockBase>(S)) && + "Unexpected scope info"); + + auto *Scope = cast<DIScope>(S); + StringRef Dir = Scope->getDirectory(), + Filename = Scope->getFilename(); + std::string &Filepath = + DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)]; + if (!Filepath.empty()) + return Filepath; + + // Clang emits directory and relative filename info into the IR, but CodeView + // operates on full paths. We could change Clang to emit full paths too, but + // that would increase the IR size and probably not needed for other users. + // For now, just concatenate and canonicalize the path here. + if (Filename.find(':') == 1) + Filepath = Filename; + else + Filepath = (Dir + "\\" + Filename).str(); + + // Canonicalize the path. We have to do it textually because we may no longer + // have access the file in the filesystem. + // First, replace all slashes with backslashes. + std::replace(Filepath.begin(), Filepath.end(), '/', '\\'); + + // Remove all "\.\" with "\". + size_t Cursor = 0; + while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 2); + + // Replace all "\XXX\..\" with "\". Don't try too hard though as the original + // path should be well-formatted, e.g. start with a drive letter, etc. + Cursor = 0; + while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) { + // Something's wrong if the path starts with "\..\", abort. + if (Cursor == 0) + break; + + size_t PrevSlash = Filepath.rfind('\\', Cursor - 1); + if (PrevSlash == std::string::npos) + // Something's wrong, abort. + break; + + Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash); + // The next ".." might be following the one we've just erased. + Cursor = PrevSlash; + } + + // Remove all duplicate backslashes. + Cursor = 0; + while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos) + Filepath.erase(Cursor, 1); + + return Filepath; +} + +void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL, + const MachineFunction *MF) { + const MDNode *Scope = DL.getScope(); + if (!Scope) + return; + StringRef Filename = getFullFilepath(Scope); + + // Skip this instruction if it has the same file:line as the previous one. + assert(CurFn); + if (!CurFn->Instrs.empty()) { + const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()]; + if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine()) + return; + } + FileNameRegistry.add(Filename); + + MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(MCL); + CurFn->Instrs.push_back(MCL); + InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol()); +} + +WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP) + : Asm(nullptr), CurFn(nullptr) { + MachineModuleInfo *MMI = AP->MMI; + + // If module doesn't have named metadata anchors or COFF debug section + // is not available, skip any debug info related stuff. + if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") || + !AP->getObjFileLowering().getCOFFDebugSymbolsSection()) + return; + + // Tell MMI that we have debug info. + MMI->setDebugInfoAvailability(true); + Asm = AP; +} + +void WinCodeViewLineTables::endModule() { + if (FnDebugInfo.empty()) + return; + + assert(Asm != nullptr); + Asm->OutStreamer->SwitchSection( + Asm->getObjFileLowering().getCOFFDebugSymbolsSection()); + Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC); + + // The COFF .debug$S section consists of several subsections, each starting + // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length + // of the payload followed by the payload itself. The subsections are 4-byte + // aligned. + + // Emit per-function debug information. This code is extracted into a + // separate function for readability. + for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I) + emitDebugInfoForFunction(VisitedFunctions[I]); + + // This subsection holds a file index to offset in string table table. + Asm->OutStreamer->AddComment("File index to string table offset subsection"); + Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION); + size_t NumFilenames = FileNameRegistry.Infos.size(); + Asm->EmitInt32(8 * NumFilenames); + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + StringRef Filename = FileNameRegistry.Filenames[I]; + // For each unique filename, just write its offset in the string table. + Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset); + // The function name offset is not followed by any additional data. + Asm->EmitInt32(0); + } + + // This subsection holds the string table. + Asm->OutStreamer->AddComment("String table"); + Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION); + Asm->EmitInt32(FileNameRegistry.LastOffset); + // The payload starts with a null character. + Asm->EmitInt8(0); + + for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) { + // Just emit unique filenames one by one, separated by a null character. + Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]); + Asm->EmitInt8(0); + } + + // No more subsections. Fill with zeros to align the end of the section by 4. + Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0); + + clear(); +} + +static void EmitLabelDiff(MCStreamer &Streamer, + const MCSymbol *From, const MCSymbol *To, + unsigned int Size = 4) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + MCContext &Context = Streamer.getContext(); + const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context), + *ToRef = MCSymbolRefExpr::create(To, Variant, Context); + const MCExpr *AddrDelta = + MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context); + Streamer.EmitValue(AddrDelta, Size); +} + +void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) { + // For each function there is a separate subsection + // which holds the PC to file:line table. + const MCSymbol *Fn = Asm->getSymbol(GV); + assert(Fn); + + const FunctionInfo &FI = FnDebugInfo[GV]; + if (FI.Instrs.empty()) + return; + assert(FI.End && "Don't know where the function ends?"); + + StringRef GVName = GV->getName(); + StringRef FuncName; + if (auto *SP = getDISubprogram(GV)) + FuncName = SP->getDisplayName(); + + // FIXME Clang currently sets DisplayName to "bar" for a C++ + // "namespace_foo::bar" function, see PR21528. Luckily, dbghelp.dll is trying + // to demangle display names anyways, so let's just put a mangled name into + // the symbols subsection until Clang gives us what we need. + if (GVName.startswith("\01?")) + FuncName = GVName.substr(1); + // Emit a symbol subsection, required by VS2012+ to find function boundaries. + MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(), + *SymbolsEnd = Asm->MMI->getContext().createTempSymbol(); + Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName)); + Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION); + EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd); + Asm->OutStreamer->EmitLabel(SymbolsBegin); + { + MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(), + *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol(); + EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2); + Asm->OutStreamer->EmitLabel(ProcSegmentBegin); + + Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START); + // Some bytes of this segment don't seem to be required for basic debugging, + // so just fill them with zeroes. + Asm->OutStreamer->EmitFill(12, 0); + // This is the important bit that tells the debugger where the function + // code is located and what's its size: + EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); + Asm->OutStreamer->EmitFill(12, 0); + Asm->OutStreamer->EmitCOFFSecRel32(Fn); + Asm->OutStreamer->EmitCOFFSectionIndex(Fn); + Asm->EmitInt8(0); + // Emit the function display name as a null-terminated string. + Asm->OutStreamer->EmitBytes(FuncName); + Asm->EmitInt8(0); + Asm->OutStreamer->EmitLabel(ProcSegmentEnd); + + // We're done with this function. + Asm->EmitInt16(0x0002); + Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END); + } + Asm->OutStreamer->EmitLabel(SymbolsEnd); + // Every subsection must be aligned to a 4-byte boundary. + Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0); + + // PCs/Instructions are grouped into segments sharing the same filename. + // Pre-calculate the lengths (in instructions) of these segments and store + // them in a map for convenience. Each index in the map is the sequential + // number of the respective instruction that starts a new segment. + DenseMap<size_t, size_t> FilenameSegmentLengths; + size_t LastSegmentEnd = 0; + StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename; + for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) { + if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename) + continue; + FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd; + LastSegmentEnd = J; + PrevFilename = InstrInfo[FI.Instrs[J]].Filename; + } + FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd; + + // Emit a line table subsection, required to do PC-to-file:line lookup. + Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName)); + Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION); + MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(), + *LineTableEnd = Asm->MMI->getContext().createTempSymbol(); + EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd); + Asm->OutStreamer->EmitLabel(LineTableBegin); + + // Identify the function this subsection is for. + Asm->OutStreamer->EmitCOFFSecRel32(Fn); + Asm->OutStreamer->EmitCOFFSectionIndex(Fn); + // Insert flags after a 16-bit section index. + Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS); + + // Length of the function's code, in bytes. + EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End); + + // PC-to-linenumber lookup table: + MCSymbol *FileSegmentEnd = nullptr; + + // The start of the last segment: + size_t LastSegmentStart = 0; + + auto FinishPreviousChunk = [&] { + if (!FileSegmentEnd) + return; + for (size_t ColSegI = LastSegmentStart, + ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart]; + ColSegI != ColSegEnd; ++ColSegI) { + unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber; + Asm->EmitInt16(ColumnNumber); // Start column + Asm->EmitInt16(ColumnNumber); // End column + } + Asm->OutStreamer->EmitLabel(FileSegmentEnd); + }; + + for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) { + MCSymbol *Instr = FI.Instrs[J]; + assert(InstrInfo.count(Instr)); + + if (FilenameSegmentLengths.count(J)) { + // We came to a beginning of a new filename segment. + FinishPreviousChunk(); + StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename; + assert(FileNameRegistry.Infos.count(CurFilename)); + size_t IndexInStringTable = + FileNameRegistry.Infos[CurFilename].FilenameID; + // Each segment starts with the offset of the filename + // in the string table. + Asm->OutStreamer->AddComment( + "Segment for file '" + Twine(CurFilename) + "' begins"); + MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol(); + Asm->OutStreamer->EmitLabel(FileSegmentBegin); + Asm->EmitInt32(8 * IndexInStringTable); + + // Number of PC records in the lookup table. + size_t SegmentLength = FilenameSegmentLengths[J]; + Asm->EmitInt32(SegmentLength); + + // Full size of the segment for this filename, including the prev two + // records. + FileSegmentEnd = Asm->MMI->getContext().createTempSymbol(); + EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd); + LastSegmentStart = J; + } + + // The first PC with the given linenumber and the linenumber itself. + EmitLabelDiff(*Asm->OutStreamer, Fn, Instr); + Asm->EmitInt32(InstrInfo[Instr].LineNumber); + } + + FinishPreviousChunk(); + Asm->OutStreamer->EmitLabel(LineTableEnd); +} + +void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) { + assert(!CurFn && "Can't process two functions at once!"); + + if (!Asm || !Asm->MMI->hasDebugInfo()) + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV) == false); + VisitedFunctions.push_back(GV); + CurFn = &FnDebugInfo[GV]; + + // Find the end of the function prolog. + // FIXME: is there a simpler a way to do this? Can we just search + // for the first instruction of the function, not the last of the prolog? + DebugLoc PrologEndLoc; + bool EmptyPrologue = true; + for (const auto &MBB : *MF) { + if (PrologEndLoc) + break; + for (const auto &MI : MBB) { + if (MI.isDebugValue()) + continue; + + // First known non-DBG_VALUE and non-frame setup location marks + // the beginning of the function body. + // FIXME: do we need the first subcondition? + if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) { + PrologEndLoc = MI.getDebugLoc(); + break; + } + EmptyPrologue = false; + } + } + // Record beginning of function if we have a non-empty prologue. + if (PrologEndLoc && !EmptyPrologue) { + DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc(); + maybeRecordLocation(FnStartDL, MF); + } +} + +void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { + if (!Asm || !CurFn) // We haven't created any debug info for this function. + return; + + const Function *GV = MF->getFunction(); + assert(FnDebugInfo.count(GV)); + assert(CurFn == &FnDebugInfo[GV]); + + if (CurFn->Instrs.empty()) { + FnDebugInfo.erase(GV); + VisitedFunctions.pop_back(); + } else { + CurFn->End = Asm->getFunctionEnd(); + } + CurFn = nullptr; +} + +void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) { + // Ignore DBG_VALUE locations and function prologue. + if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup)) + return; + DebugLoc DL = MI->getDebugLoc(); + if (DL == PrevInstLoc || !DL) + return; + maybeRecordLocation(DL, Asm->MF); +} +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h new file mode 100644 index 0000000..78068e0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h @@ -0,0 +1,138 @@ +//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.h ----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing line tables info into COFF files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WINCODEVIEWLINETABLES_H + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Target/TargetLoweringObjectFile.h" + +namespace llvm { +/// \brief Collects and handles line tables information in a CodeView format. +class LLVM_LIBRARY_VISIBILITY WinCodeViewLineTables : public AsmPrinterHandler { + AsmPrinter *Asm; + DebugLoc PrevInstLoc; + + // For each function, store a vector of labels to its instructions, as well as + // to the end of the function. + struct FunctionInfo { + SmallVector<MCSymbol *, 10> Instrs; + MCSymbol *End; + FunctionInfo() : End(nullptr) {} + } *CurFn; + + typedef DenseMap<const Function *, FunctionInfo> FnDebugInfoTy; + FnDebugInfoTy FnDebugInfo; + // Store the functions we've visited in a vector so we can maintain a stable + // order while emitting subsections. + SmallVector<const Function *, 10> VisitedFunctions; + + // InstrInfoTy - Holds the Filename:LineNumber information for every + // instruction with a unique debug location. + struct InstrInfoTy { + StringRef Filename; + unsigned LineNumber; + unsigned ColumnNumber; + + InstrInfoTy() : LineNumber(0), ColumnNumber(0) {} + + InstrInfoTy(StringRef Filename, unsigned LineNumber, unsigned ColumnNumber) + : Filename(Filename), LineNumber(LineNumber), + ColumnNumber(ColumnNumber) {} + }; + DenseMap<MCSymbol *, InstrInfoTy> InstrInfo; + + // FileNameRegistry - Manages filenames observed while generating debug info + // by filtering out duplicates and bookkeeping the offsets in the string + // table to be generated. + struct FileNameRegistryTy { + SmallVector<StringRef, 10> Filenames; + struct PerFileInfo { + size_t FilenameID, StartOffset; + }; + StringMap<PerFileInfo> Infos; + + // The offset in the string table where we'll write the next unique + // filename. + size_t LastOffset; + + FileNameRegistryTy() { + clear(); + } + + // Add Filename to the registry, if it was not observed before. + void add(StringRef Filename) { + if (Infos.count(Filename)) + return; + size_t OldSize = Infos.size(); + Infos[Filename].FilenameID = OldSize; + Infos[Filename].StartOffset = LastOffset; + LastOffset += Filename.size() + 1; + Filenames.push_back(Filename); + } + + void clear() { + LastOffset = 1; + Infos.clear(); + Filenames.clear(); + } + } FileNameRegistry; + + typedef std::map<std::pair<StringRef, StringRef>, std::string> + DirAndFilenameToFilepathMapTy; + DirAndFilenameToFilepathMapTy DirAndFilenameToFilepathMap; + StringRef getFullFilepath(const MDNode *S); + + void maybeRecordLocation(DebugLoc DL, const MachineFunction *MF); + + void clear() { + assert(CurFn == nullptr); + FileNameRegistry.clear(); + InstrInfo.clear(); + } + + void emitDebugInfoForFunction(const Function *GV); + +public: + WinCodeViewLineTables(AsmPrinter *Asm); + + void setSymbolSize(const llvm::MCSymbol *, uint64_t) override {} + + /// \brief Emit the COFF section that holds the line table information. + void endModule() override; + + /// \brief Gather pre-function debug information. + void beginFunction(const MachineFunction *MF) override; + + /// \brief Gather post-function debug information. + void endFunction(const MachineFunction *) override; + + /// \brief Process beginning of an instruction. + void beginInstruction(const MachineInstr *MI) override; + + /// \brief Process end of an instruction. + void endInstruction() override {} +}; +} // End of namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp new file mode 100644 index 0000000..4da5b58 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp @@ -0,0 +1,1237 @@ +//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing Win64 exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "WinException.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCWin64EH.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +WinException::WinException(AsmPrinter *A) : EHStreamer(A) { + // MSVC's EH tables are always composed of 32-bit words. All known 64-bit + // platforms use an imagerel32 relocation to refer to symbols. + useImageRel32 = (A->getDataLayout().getPointerSizeInBits() == 64); +} + +WinException::~WinException() {} + +/// endModule - Emit all exception information that should come after the +/// content. +void WinException::endModule() { + auto &OS = *Asm->OutStreamer; + const Module *M = MMI->getModule(); + for (const Function &F : *M) + if (F.hasFnAttribute("safeseh")) + OS.EmitCOFFSafeSEH(Asm->getSymbol(&F)); +} + +void WinException::beginFunction(const MachineFunction *MF) { + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; + + // If any landing pads survive, we need an EH table. + bool hasLandingPads = !MMI->getLandingPads().empty(); + bool hasEHFunclets = MMI->hasEHFunclets(); + + const Function *F = MF->getFunction(); + + shouldEmitMoves = Asm->needsSEHMoves(); + + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = nullptr; + if (F->hasPersonalityFn()) + Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + + bool forceEmitPersonality = + F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) && + F->needsUnwindTableEntry(); + + shouldEmitPersonality = + forceEmitPersonality || ((hasLandingPads || hasEHFunclets) && + PerEncoding != dwarf::DW_EH_PE_omit && Per); + + unsigned LSDAEncoding = TLOF.getLSDAEncoding(); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + // If we're not using CFI, we don't want the CFI or the personality, but we + // might want EH tables if we had EH pads. + if (!Asm->MAI->usesWindowsCFI()) { + shouldEmitLSDA = hasEHFunclets; + shouldEmitPersonality = false; + return; + } + + beginFunclet(MF->front(), Asm->CurrentFnSym); +} + +/// endFunction - Gather and emit post-function exception information. +/// +void WinException::endFunction(const MachineFunction *MF) { + if (!shouldEmitPersonality && !shouldEmitMoves && !shouldEmitLSDA) + return; + + const Function *F = MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // Get rid of any dead landing pads if we're not using funclets. In funclet + // schemes, the landing pad is not actually reachable. It only exists so + // that we can emit the right table data. + if (!isFuncletEHPersonality(Per)) + MMI->TidyLandingPads(); + + endFunclet(); + + // endFunclet will emit the necessary .xdata tables for x64 SEH. + if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets()) + return; + + if (shouldEmitPersonality || shouldEmitLSDA) { + Asm->OutStreamer->PushSection(); + + // Just switch sections to the right xdata section. This use of CurrentFnSym + // assumes that we only emit the LSDA when ending the parent function. + MCSection *XData = WinEH::UnwindEmitter::getXDataSection(Asm->CurrentFnSym, + Asm->OutContext); + Asm->OutStreamer->SwitchSection(XData); + + // Emit the tables appropriate to the personality function in use. If we + // don't recognize the personality, assume it uses an Itanium-style LSDA. + if (Per == EHPersonality::MSVC_Win64SEH) + emitCSpecificHandlerTable(MF); + else if (Per == EHPersonality::MSVC_X86SEH) + emitExceptHandlerTable(MF); + else if (Per == EHPersonality::MSVC_CXX) + emitCXXFrameHandler3Table(MF); + else if (Per == EHPersonality::CoreCLR) + emitCLRExceptionTable(MF); + else + emitExceptionTable(); + + Asm->OutStreamer->PopSection(); + } +} + +/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock. +static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm, + const MachineBasicBlock *MBB) { + if (!MBB) + return nullptr; + + assert(MBB->isEHFuncletEntry()); + + // Give catches and cleanups a name based off of their parent function and + // their funclet entry block's number. + const MachineFunction *MF = MBB->getParent(); + const Function *F = MF->getFunction(); + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + MCContext &Ctx = MF->getContext(); + StringRef HandlerPrefix = MBB->isCleanupFuncletEntry() ? "dtor" : "catch"; + return Ctx.getOrCreateSymbol("?" + HandlerPrefix + "$" + + Twine(MBB->getNumber()) + "@?0?" + + FuncLinkageName + "@4HA"); +} + +void WinException::beginFunclet(const MachineBasicBlock &MBB, + MCSymbol *Sym) { + CurrentFuncletEntry = &MBB; + + const Function *F = Asm->MF->getFunction(); + // If a symbol was not provided for the funclet, invent one. + if (!Sym) { + Sym = getMCSymbolForMBB(Asm, &MBB); + + // Describe our funclet symbol as a function with internal linkage. + Asm->OutStreamer->BeginCOFFSymbolDef(Sym); + Asm->OutStreamer->EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_STATIC); + Asm->OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION + << COFF::SCT_COMPLEX_TYPE_SHIFT); + Asm->OutStreamer->EndCOFFSymbolDef(); + + // We want our funclet's entry point to be aligned such that no nops will be + // present after the label. + Asm->EmitAlignment(std::max(Asm->MF->getAlignment(), MBB.getAlignment()), + F); + + // Now that we've emitted the alignment directive, point at our funclet. + Asm->OutStreamer->EmitLabel(Sym); + } + + // Mark 'Sym' as starting our funclet. + if (shouldEmitMoves || shouldEmitPersonality) + Asm->OutStreamer->EmitWinCFIStartProc(Sym); + + if (shouldEmitPersonality) { + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + const Function *PerFn = nullptr; + + // Determine which personality routine we are using for this funclet. + if (F->hasPersonalityFn()) + PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI); + + // Classify the personality routine so that we may reason about it. + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // Do not emit a .seh_handler directive if it is a C++ cleanup funclet. + if (Per != EHPersonality::MSVC_CXX || + !CurrentFuncletEntry->isCleanupFuncletEntry()) + Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true); + } +} + +void WinException::endFunclet() { + // No funclet to process? Great, we have nothing to do. + if (!CurrentFuncletEntry) + return; + + if (shouldEmitMoves || shouldEmitPersonality) { + const Function *F = Asm->MF->getFunction(); + EHPersonality Per = EHPersonality::Unknown; + if (F->hasPersonalityFn()) + Per = classifyEHPersonality(F->getPersonalityFn()); + + // The .seh_handlerdata directive implicitly switches section, push the + // current section so that we may return to it. + Asm->OutStreamer->PushSection(); + + // Emit an UNWIND_INFO struct describing the prologue. + Asm->OutStreamer->EmitWinEHHandlerData(); + + if (Per == EHPersonality::MSVC_CXX && shouldEmitPersonality && + !CurrentFuncletEntry->isCleanupFuncletEntry()) { + // If this is a C++ catch funclet (or the parent function), + // emit a reference to the LSDA for the parent function. + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol( + Twine("$cppxdata$", FuncLinkageName)); + Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4); + } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() && + !CurrentFuncletEntry->isEHFuncletEntry()) { + // If this is the parent function in Win64 SEH, emit the LSDA immediately + // following .seh_handlerdata. + emitCSpecificHandlerTable(Asm->MF); + } + + // Switch back to the previous section now that we are done writing to + // .xdata. + Asm->OutStreamer->PopSection(); + + // Emit a .seh_endproc directive to mark the end of the function. + Asm->OutStreamer->EmitWinCFIEndProc(); + } + + // Let's make sure we don't try to end the same funclet twice. + CurrentFuncletEntry = nullptr; +} + +const MCExpr *WinException::create32bitRef(const MCSymbol *Value) { + if (!Value) + return MCConstantExpr::create(0, Asm->OutContext); + return MCSymbolRefExpr::create(Value, useImageRel32 + ? MCSymbolRefExpr::VK_COFF_IMGREL32 + : MCSymbolRefExpr::VK_None, + Asm->OutContext); +} + +const MCExpr *WinException::create32bitRef(const GlobalValue *GV) { + if (!GV) + return MCConstantExpr::create(0, Asm->OutContext); + return create32bitRef(Asm->getSymbol(GV)); +} + +const MCExpr *WinException::getLabelPlusOne(const MCSymbol *Label) { + return MCBinaryExpr::createAdd(create32bitRef(Label), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +const MCExpr *WinException::getOffset(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createSub( + MCSymbolRefExpr::create(OffsetOf, Asm->OutContext), + MCSymbolRefExpr::create(OffsetFrom, Asm->OutContext), Asm->OutContext); +} + +const MCExpr *WinException::getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom) { + return MCBinaryExpr::createAdd(getOffset(OffsetOf, OffsetFrom), + MCConstantExpr::create(1, Asm->OutContext), + Asm->OutContext); +} + +int WinException::getFrameIndexOffset(int FrameIndex, + const WinEHFuncInfo &FuncInfo) { + const TargetFrameLowering &TFI = *Asm->MF->getSubtarget().getFrameLowering(); + unsigned UnusedReg; + if (Asm->MAI->usesWindowsCFI()) + return TFI.getFrameIndexReferenceFromSP(*Asm->MF, FrameIndex, UnusedReg); + // For 32-bit, offsets should be relative to the end of the EH registration + // node. For 64-bit, it's relative to SP at the end of the prologue. + assert(FuncInfo.EHRegNodeEndOffset != INT_MAX); + int Offset = TFI.getFrameIndexReference(*Asm->MF, FrameIndex, UnusedReg); + Offset += FuncInfo.EHRegNodeEndOffset; + return Offset; +} + +namespace { + +/// Top-level state used to represent unwind to caller +const int NullState = -1; + +struct InvokeStateChange { + /// EH Label immediately after the last invoke in the previous state, or + /// nullptr if the previous state was the null state. + const MCSymbol *PreviousEndLabel; + + /// EH label immediately before the first invoke in the new state, or nullptr + /// if the new state is the null state. + const MCSymbol *NewStartLabel; + + /// State of the invoke following NewStartLabel, or NullState to indicate + /// the presence of calls which may unwind to caller. + int NewState; +}; + +/// Iterator that reports all the invoke state changes in a range of machine +/// basic blocks. Changes to the null state are reported whenever a call that +/// may unwind to caller is encountered. The MBB range is expected to be an +/// entire function or funclet, and the start and end of the range are treated +/// as being in the NullState even if there's not an unwind-to-caller call +/// before the first invoke or after the last one (i.e., the first state change +/// reported is the first change to something other than NullState, and a +/// change back to NullState is always reported at the end of iteration). +class InvokeStateChangeIterator { + InvokeStateChangeIterator(const WinEHFuncInfo &EHInfo, + MachineFunction::const_iterator MFI, + MachineFunction::const_iterator MFE, + MachineBasicBlock::const_iterator MBBI, + int BaseState) + : EHInfo(EHInfo), MFI(MFI), MFE(MFE), MBBI(MBBI), BaseState(BaseState) { + LastStateChange.PreviousEndLabel = nullptr; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + scan(); + } + +public: + static iterator_range<InvokeStateChangeIterator> + range(const WinEHFuncInfo &EHInfo, MachineFunction::const_iterator Begin, + MachineFunction::const_iterator End, int BaseState = NullState) { + // Reject empty ranges to simplify bookkeeping by ensuring that we can get + // the end of the last block. + assert(Begin != End); + auto BlockBegin = Begin->begin(); + auto BlockEnd = std::prev(End)->end(); + return make_range( + InvokeStateChangeIterator(EHInfo, Begin, End, BlockBegin, BaseState), + InvokeStateChangeIterator(EHInfo, End, End, BlockEnd, BaseState)); + } + + // Iterator methods. + bool operator==(const InvokeStateChangeIterator &O) const { + assert(BaseState == O.BaseState); + // Must be visiting same block. + if (MFI != O.MFI) + return false; + // Must be visiting same isntr. + if (MBBI != O.MBBI) + return false; + // At end of block/instr iteration, we can still have two distinct states: + // one to report the final EndLabel, and another indicating the end of the + // state change iteration. Check for CurrentEndLabel equality to + // distinguish these. + return CurrentEndLabel == O.CurrentEndLabel; + } + + bool operator!=(const InvokeStateChangeIterator &O) const { + return !operator==(O); + } + InvokeStateChange &operator*() { return LastStateChange; } + InvokeStateChange *operator->() { return &LastStateChange; } + InvokeStateChangeIterator &operator++() { return scan(); } + +private: + InvokeStateChangeIterator &scan(); + + const WinEHFuncInfo &EHInfo; + const MCSymbol *CurrentEndLabel = nullptr; + MachineFunction::const_iterator MFI; + MachineFunction::const_iterator MFE; + MachineBasicBlock::const_iterator MBBI; + InvokeStateChange LastStateChange; + bool VisitingInvoke = false; + int BaseState; +}; + +} // end anonymous namespace + +InvokeStateChangeIterator &InvokeStateChangeIterator::scan() { + bool IsNewBlock = false; + for (; MFI != MFE; ++MFI, IsNewBlock = true) { + if (IsNewBlock) + MBBI = MFI->begin(); + for (auto MBBE = MFI->end(); MBBI != MBBE; ++MBBI) { + const MachineInstr &MI = *MBBI; + if (!VisitingInvoke && LastStateChange.NewState != BaseState && + MI.isCall() && !EHStreamer::callToNoUnwindFunction(&MI)) { + // Indicate a change of state to the null state. We don't have + // start/end EH labels handy but the caller won't expect them for + // null state regions. + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + CurrentEndLabel = nullptr; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + + // All other state changes are at EH labels before/after invokes. + if (!MI.isEHLabel()) + continue; + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + if (Label == CurrentEndLabel) { + VisitingInvoke = false; + continue; + } + auto InvokeMapIter = EHInfo.LabelToStateMap.find(Label); + // Ignore EH labels that aren't the ones inserted before an invoke + if (InvokeMapIter == EHInfo.LabelToStateMap.end()) + continue; + auto &StateAndEnd = InvokeMapIter->second; + int NewState = StateAndEnd.first; + // Keep track of the fact that we're between EH start/end labels so + // we know not to treat the inoke we'll see as unwinding to caller. + VisitingInvoke = true; + if (NewState == LastStateChange.NewState) { + // The state isn't actually changing here. Record the new end and + // keep going. + CurrentEndLabel = StateAndEnd.second; + continue; + } + // Found a state change to report + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = Label; + LastStateChange.NewState = NewState; + // Start keeping track of the new current end + CurrentEndLabel = StateAndEnd.second; + // Don't re-visit this instr on the next scan + ++MBBI; + return *this; + } + } + // Iteration hit the end of the block range. + if (LastStateChange.NewState != BaseState) { + // Report the end of the last new state + LastStateChange.PreviousEndLabel = CurrentEndLabel; + LastStateChange.NewStartLabel = nullptr; + LastStateChange.NewState = BaseState; + // Leave CurrentEndLabel non-null to distinguish this state from end. + assert(CurrentEndLabel != nullptr); + return *this; + } + // We've reported all state changes and hit the end state. + CurrentEndLabel = nullptr; + return *this; +} + +/// Emit the language-specific data that __C_specific_handler expects. This +/// handler lives in the x64 Microsoft C runtime and allows catching or cleaning +/// up after faults with __try, __except, and __finally. The typeinfo values +/// are not really RTTI data, but pointers to filter functions that return an +/// integer (1, 0, or -1) indicating how to handle the exception. For __finally +/// blocks and other cleanups, the landing pad label is zero, and the filter +/// function is actually a cleanup handler with the same prototype. A catch-all +/// entry is modeled with a null filter function field and a non-zero landing +/// pad label. +/// +/// Possible filter function return values: +/// EXCEPTION_EXECUTE_HANDLER (1): +/// Jump to the landing pad label after cleanups. +/// EXCEPTION_CONTINUE_SEARCH (0): +/// Continue searching this table or continue unwinding. +/// EXCEPTION_CONTINUE_EXECUTION (-1): +/// Resume execution at the trapping PC. +/// +/// Inferred table structure: +/// struct Table { +/// int NumEntries; +/// struct Entry { +/// imagerel32 LabelStart; +/// imagerel32 LabelEnd; +/// imagerel32 FilterOrFinally; // One means catch-all. +/// imagerel32 LabelLPad; // Zero means __finally. +/// } Entries[NumEntries]; +/// }; +void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + // Emit a label assignment with the SEH frame offset so we can use it for + // llvm.x86.seh.recoverfp. + StringRef FLinkageName = + GlobalValue::getRealLinkageName(MF->getFunction()->getName()); + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + const MCExpr *MCOffset = + MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); + + // Use the assembler to compute the number of table entries through label + // difference and division. + MCSymbol *TableBegin = + Ctx.createTempSymbol("lsda_begin", /*AlwaysAddSuffix=*/true); + MCSymbol *TableEnd = + Ctx.createTempSymbol("lsda_end", /*AlwaysAddSuffix=*/true); + const MCExpr *LabelDiff = getOffset(TableEnd, TableBegin); + const MCExpr *EntrySize = MCConstantExpr::create(16, Ctx); + const MCExpr *EntryCount = MCBinaryExpr::createDiv(LabelDiff, EntrySize, Ctx); + AddComment("Number of call sites"); + OS.EmitValue(EntryCount, 4); + + OS.EmitLabel(TableBegin); + + // Iterate over all the invoke try ranges. Unlike MSVC, LLVM currently only + // models exceptions from invokes. LLVM also allows arbitrary reordering of + // the code, so our tables end up looking a bit different. Rather than + // trying to match MSVC's tables exactly, we emit a denormalized table. For + // each range of invokes in the same state, we emit table entries for all + // the actions that would be taken in that state. This means our tables are + // slightly bigger, which is OK. + const MCSymbol *LastStartLabel = nullptr; + int LastEHState = -1; + // Break out before we enter into a finally funclet. + // FIXME: We need to emit separate EH tables for cleanups. + MachineFunction::const_iterator End = MF->end(); + MachineFunction::const_iterator Stop = std::next(MF->begin()); + while (Stop != End && !Stop->isEHFuncletEntry()) + ++Stop; + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, MF->begin(), Stop)) { + // Emit all the actions for the state we just transitioned out of + // if it was not the null state + if (LastEHState != -1) + emitSEHActionsForRange(FuncInfo, LastStartLabel, + StateChange.PreviousEndLabel, LastEHState); + LastStartLabel = StateChange.NewStartLabel; + LastEHState = StateChange.NewState; + } + + OS.EmitLabel(TableEnd); +} + +void WinException::emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State) { + auto &OS = *Asm->OutStreamer; + MCContext &Ctx = Asm->OutContext; + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + assert(BeginLabel && EndLabel); + while (State != -1) { + const SEHUnwindMapEntry &UME = FuncInfo.SEHUnwindMap[State]; + const MCExpr *FilterOrFinally; + const MCExpr *ExceptOrNull; + auto *Handler = UME.Handler.get<MachineBasicBlock *>(); + if (UME.IsFinally) { + FilterOrFinally = create32bitRef(getMCSymbolForMBB(Asm, Handler)); + ExceptOrNull = MCConstantExpr::create(0, Ctx); + } else { + // For an except, the filter can be 1 (catch-all) or a function + // label. + FilterOrFinally = UME.Filter ? create32bitRef(UME.Filter) + : MCConstantExpr::create(1, Ctx); + ExceptOrNull = create32bitRef(Handler->getSymbol()); + } + + AddComment("LabelStart"); + OS.EmitValue(getLabelPlusOne(BeginLabel), 4); + AddComment("LabelEnd"); + OS.EmitValue(getLabelPlusOne(EndLabel), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : UME.Filter ? "FilterFunction" + : "CatchAll"); + OS.EmitValue(FilterOrFinally, 4); + AddComment(UME.IsFinally ? "Null" : "ExceptionHandler"); + OS.EmitValue(ExceptOrNull, 4); + + assert(UME.ToState < State && "states should decrease"); + State = UME.ToState; + } +} + +void WinException::emitCXXFrameHandler3Table(const MachineFunction *MF) { + const Function *F = MF->getFunction(); + auto &OS = *Asm->OutStreamer; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + + StringRef FuncLinkageName = GlobalValue::getRealLinkageName(F->getName()); + + SmallVector<std::pair<const MCExpr *, int>, 4> IPToStateTable; + MCSymbol *FuncInfoXData = nullptr; + if (shouldEmitPersonality) { + // If we're 64-bit, emit a pointer to the C++ EH data, and build a map from + // IPs to state numbers. + FuncInfoXData = + Asm->OutContext.getOrCreateSymbol(Twine("$cppxdata$", FuncLinkageName)); + computeIP2StateTable(MF, FuncInfo, IPToStateTable); + } else { + FuncInfoXData = Asm->OutContext.getOrCreateLSDASymbol(FuncLinkageName); + } + + int UnwindHelpOffset = 0; + if (Asm->MAI->usesWindowsCFI()) + UnwindHelpOffset = + getFrameIndexOffset(FuncInfo.UnwindHelpFrameIdx, FuncInfo); + + MCSymbol *UnwindMapXData = nullptr; + MCSymbol *TryBlockMapXData = nullptr; + MCSymbol *IPToStateXData = nullptr; + if (!FuncInfo.CxxUnwindMap.empty()) + UnwindMapXData = Asm->OutContext.getOrCreateSymbol( + Twine("$stateUnwindMap$", FuncLinkageName)); + if (!FuncInfo.TryBlockMap.empty()) + TryBlockMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$tryMap$", FuncLinkageName)); + if (!IPToStateTable.empty()) + IPToStateXData = + Asm->OutContext.getOrCreateSymbol(Twine("$ip2state$", FuncLinkageName)); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + // FuncInfo { + // uint32_t MagicNumber + // int32_t MaxState; + // UnwindMapEntry *UnwindMap; + // uint32_t NumTryBlocks; + // TryBlockMapEntry *TryBlockMap; + // uint32_t IPMapEntries; // always 0 for x86 + // IPToStateMapEntry *IPToStateMap; // always 0 for x86 + // uint32_t UnwindHelp; // non-x86 only + // ESTypeList *ESTypeList; + // int32_t EHFlags; + // } + // EHFlags & 1 -> Synchronous exceptions only, no async exceptions. + // EHFlags & 2 -> ??? + // EHFlags & 4 -> The function is noexcept(true), unwinding can't continue. + OS.EmitValueToAlignment(4); + OS.EmitLabel(FuncInfoXData); + + AddComment("MagicNumber"); + OS.EmitIntValue(0x19930522, 4); + + AddComment("MaxState"); + OS.EmitIntValue(FuncInfo.CxxUnwindMap.size(), 4); + + AddComment("UnwindMap"); + OS.EmitValue(create32bitRef(UnwindMapXData), 4); + + AddComment("NumTryBlocks"); + OS.EmitIntValue(FuncInfo.TryBlockMap.size(), 4); + + AddComment("TryBlockMap"); + OS.EmitValue(create32bitRef(TryBlockMapXData), 4); + + AddComment("IPMapEntries"); + OS.EmitIntValue(IPToStateTable.size(), 4); + + AddComment("IPToStateXData"); + OS.EmitValue(create32bitRef(IPToStateXData), 4); + + if (Asm->MAI->usesWindowsCFI()) { + AddComment("UnwindHelp"); + OS.EmitIntValue(UnwindHelpOffset, 4); + } + + AddComment("ESTypeList"); + OS.EmitIntValue(0, 4); + + AddComment("EHFlags"); + OS.EmitIntValue(1, 4); + + // UnwindMapEntry { + // int32_t ToState; + // void (*Action)(); + // }; + if (UnwindMapXData) { + OS.EmitLabel(UnwindMapXData); + for (const CxxUnwindMapEntry &UME : FuncInfo.CxxUnwindMap) { + MCSymbol *CleanupSym = + getMCSymbolForMBB(Asm, UME.Cleanup.dyn_cast<MachineBasicBlock *>()); + AddComment("ToState"); + OS.EmitIntValue(UME.ToState, 4); + + AddComment("Action"); + OS.EmitValue(create32bitRef(CleanupSym), 4); + } + } + + // TryBlockMap { + // int32_t TryLow; + // int32_t TryHigh; + // int32_t CatchHigh; + // int32_t NumCatches; + // HandlerType *HandlerArray; + // }; + if (TryBlockMapXData) { + OS.EmitLabel(TryBlockMapXData); + SmallVector<MCSymbol *, 1> HandlerMaps; + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + + MCSymbol *HandlerMapXData = nullptr; + if (!TBME.HandlerArray.empty()) + HandlerMapXData = + Asm->OutContext.getOrCreateSymbol(Twine("$handlerMap$") + .concat(Twine(I)) + .concat("$") + .concat(FuncLinkageName)); + HandlerMaps.push_back(HandlerMapXData); + + // TBMEs should form intervals. + assert(0 <= TBME.TryLow && "bad trymap interval"); + assert(TBME.TryLow <= TBME.TryHigh && "bad trymap interval"); + assert(TBME.TryHigh < TBME.CatchHigh && "bad trymap interval"); + assert(TBME.CatchHigh < int(FuncInfo.CxxUnwindMap.size()) && + "bad trymap interval"); + + AddComment("TryLow"); + OS.EmitIntValue(TBME.TryLow, 4); + + AddComment("TryHigh"); + OS.EmitIntValue(TBME.TryHigh, 4); + + AddComment("CatchHigh"); + OS.EmitIntValue(TBME.CatchHigh, 4); + + AddComment("NumCatches"); + OS.EmitIntValue(TBME.HandlerArray.size(), 4); + + AddComment("HandlerArray"); + OS.EmitValue(create32bitRef(HandlerMapXData), 4); + } + + // All funclets use the same parent frame offset currently. + unsigned ParentFrameOffset = 0; + if (shouldEmitPersonality) { + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + ParentFrameOffset = TFI->getWinEHParentFrameOffset(*MF); + } + + for (size_t I = 0, E = FuncInfo.TryBlockMap.size(); I != E; ++I) { + const WinEHTryBlockMapEntry &TBME = FuncInfo.TryBlockMap[I]; + MCSymbol *HandlerMapXData = HandlerMaps[I]; + if (!HandlerMapXData) + continue; + // HandlerType { + // int32_t Adjectives; + // TypeDescriptor *Type; + // int32_t CatchObjOffset; + // void (*Handler)(); + // int32_t ParentFrameOffset; // x64 only + // }; + OS.EmitLabel(HandlerMapXData); + for (const WinEHHandlerType &HT : TBME.HandlerArray) { + // Get the frame escape label with the offset of the catch object. If + // the index is INT_MAX, then there is no catch object, and we should + // emit an offset of zero, indicating that no copy will occur. + const MCExpr *FrameAllocOffsetRef = nullptr; + if (HT.CatchObj.FrameIndex != INT_MAX) { + int Offset = getFrameIndexOffset(HT.CatchObj.FrameIndex, FuncInfo); + FrameAllocOffsetRef = MCConstantExpr::create(Offset, Asm->OutContext); + } else { + FrameAllocOffsetRef = MCConstantExpr::create(0, Asm->OutContext); + } + + MCSymbol *HandlerSym = + getMCSymbolForMBB(Asm, HT.Handler.dyn_cast<MachineBasicBlock *>()); + + AddComment("Adjectives"); + OS.EmitIntValue(HT.Adjectives, 4); + + AddComment("Type"); + OS.EmitValue(create32bitRef(HT.TypeDescriptor), 4); + + AddComment("CatchObjOffset"); + OS.EmitValue(FrameAllocOffsetRef, 4); + + AddComment("Handler"); + OS.EmitValue(create32bitRef(HandlerSym), 4); + + if (shouldEmitPersonality) { + AddComment("ParentFrameOffset"); + OS.EmitIntValue(ParentFrameOffset, 4); + } + } + } + } + + // IPToStateMapEntry { + // void *IP; + // int32_t State; + // }; + if (IPToStateXData) { + OS.EmitLabel(IPToStateXData); + for (auto &IPStatePair : IPToStateTable) { + AddComment("IP"); + OS.EmitValue(IPStatePair.first, 4); + AddComment("ToState"); + OS.EmitIntValue(IPStatePair.second, 4); + } + } +} + +void WinException::computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable) { + + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + // Find the end of the funclet + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + break; + } + } + + // Don't emit ip2state entries for cleanup funclets. Any interesting + // exceptional actions in cleanups must be handled in a separate IR + // function. + if (FuncletStart->isCleanupFuncletEntry()) + continue; + + MCSymbol *StartLabel; + int BaseState; + if (FuncletStart == MF->begin()) { + BaseState = NullState; + StartLabel = Asm->getFunctionBegin(); + } else { + auto *FuncletPad = + cast<FuncletPadInst>(FuncletStart->getBasicBlock()->getFirstNonPHI()); + assert(FuncInfo.FuncletBaseStateMap.count(FuncletPad) != 0); + BaseState = FuncInfo.FuncletBaseStateMap.find(FuncletPad)->second; + StartLabel = getMCSymbolForMBB(Asm, &*FuncletStart); + } + assert(StartLabel && "need local function start label"); + IPToStateTable.push_back( + std::make_pair(create32bitRef(StartLabel), BaseState)); + + for (const auto &StateChange : InvokeStateChangeIterator::range( + FuncInfo, FuncletStart, FuncletEnd, BaseState)) { + // Compute the label to report as the start of this entry; use the EH + // start label for the invoke if we have one, otherwise (this is a call + // which may unwind to our caller and does not have an EH start label, so) + // use the previous end label. + const MCSymbol *ChangeLabel = StateChange.NewStartLabel; + if (!ChangeLabel) + ChangeLabel = StateChange.PreviousEndLabel; + // Emit an entry indicating that PCs after 'Label' have this EH state. + IPToStateTable.push_back( + std::make_pair(getLabelPlusOne(ChangeLabel), StateChange.NewState)); + // FIXME: assert that NewState is between CatchLow and CatchHigh. + } + } +} + +void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, + StringRef FLinkageName) { + // Outlined helpers called by the EH runtime need to know the offset of the EH + // registration in order to recover the parent frame pointer. Now that we know + // we've code generated the parent, we can emit the label assignment that + // those helpers use to get the offset of the registration node. + MCContext &Ctx = Asm->OutContext; + MCSymbol *ParentFrameOffset = + Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName); + unsigned UnusedReg; + const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering(); + int64_t Offset = TFI->getFrameIndexReference( + *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg); + const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx); + Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset); +} + +/// Emit the language-specific data that _except_handler3 and 4 expect. This is +/// functionally equivalent to the __C_specific_handler table, except it is +/// indexed by state number instead of IP. +void WinException::emitExceptHandlerTable(const MachineFunction *MF) { + MCStreamer &OS = *Asm->OutStreamer; + const Function *F = MF->getFunction(); + StringRef FLinkageName = GlobalValue::getRealLinkageName(F->getName()); + + bool VerboseAsm = OS.isVerboseAsm(); + auto AddComment = [&](const Twine &Comment) { + if (VerboseAsm) + OS.AddComment(Comment); + }; + + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName); + + // Emit the __ehtable label that we use for llvm.x86.seh.lsda. + MCSymbol *LSDALabel = Asm->OutContext.getOrCreateLSDASymbol(FLinkageName); + OS.EmitValueToAlignment(4); + OS.EmitLabel(LSDALabel); + + const Function *Per = + dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts()); + StringRef PerName = Per->getName(); + int BaseState = -1; + if (PerName == "_except_handler4") { + // The LSDA for _except_handler4 starts with this struct, followed by the + // scope table: + // + // struct EH4ScopeTable { + // int32_t GSCookieOffset; + // int32_t GSCookieXOROffset; + // int32_t EHCookieOffset; + // int32_t EHCookieXOROffset; + // ScopeTableEntry ScopeRecord[]; + // }; + // + // Only the EHCookieOffset field appears to vary, and it appears to be the + // offset from the final saved SP value to the retaddr. + AddComment("GSCookieOffset"); + OS.EmitIntValue(-2, 4); + AddComment("GSCookieXOROffset"); + OS.EmitIntValue(0, 4); + // FIXME: Calculate. + AddComment("EHCookieOffset"); + OS.EmitIntValue(9999, 4); + AddComment("EHCookieXOROffset"); + OS.EmitIntValue(0, 4); + BaseState = -2; + } + + assert(!FuncInfo.SEHUnwindMap.empty()); + for (const SEHUnwindMapEntry &UME : FuncInfo.SEHUnwindMap) { + auto *Handler = UME.Handler.get<MachineBasicBlock *>(); + const MCSymbol *ExceptOrFinally = + UME.IsFinally ? getMCSymbolForMBB(Asm, Handler) : Handler->getSymbol(); + // -1 is usually the base state for "unwind to caller", but for + // _except_handler4 it's -2. Do that replacement here if necessary. + int ToState = UME.ToState == -1 ? BaseState : UME.ToState; + AddComment("ToState"); + OS.EmitIntValue(ToState, 4); + AddComment(UME.IsFinally ? "Null" : "FilterFunction"); + OS.EmitValue(create32bitRef(UME.Filter), 4); + AddComment(UME.IsFinally ? "FinallyFunclet" : "ExceptionHandler"); + OS.EmitValue(create32bitRef(ExceptOrFinally), 4); + } +} + +static int getTryRank(const WinEHFuncInfo &FuncInfo, int State) { + int Rank = 0; + while (State != -1) { + ++Rank; + State = FuncInfo.ClrEHUnwindMap[State].TryParentState; + } + return Rank; +} + +static int getTryAncestor(const WinEHFuncInfo &FuncInfo, int Left, int Right) { + int LeftRank = getTryRank(FuncInfo, Left); + int RightRank = getTryRank(FuncInfo, Right); + + while (LeftRank < RightRank) { + Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState; + --RightRank; + } + + while (RightRank < LeftRank) { + Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState; + --LeftRank; + } + + while (Left != Right) { + Left = FuncInfo.ClrEHUnwindMap[Left].TryParentState; + Right = FuncInfo.ClrEHUnwindMap[Right].TryParentState; + } + + return Left; +} + +void WinException::emitCLRExceptionTable(const MachineFunction *MF) { + // CLR EH "states" are really just IDs that identify handlers/funclets; + // states, handlers, and funclets all have 1:1 mappings between them, and a + // handler/funclet's "state" is its index in the ClrEHUnwindMap. + MCStreamer &OS = *Asm->OutStreamer; + const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo(); + MCSymbol *FuncBeginSym = Asm->getFunctionBegin(); + MCSymbol *FuncEndSym = Asm->getFunctionEnd(); + + // A ClrClause describes a protected region. + struct ClrClause { + const MCSymbol *StartLabel; // Start of protected region + const MCSymbol *EndLabel; // End of protected region + int State; // Index of handler protecting the protected region + int EnclosingState; // Index of funclet enclosing the protected region + }; + SmallVector<ClrClause, 8> Clauses; + + // Build a map from handler MBBs to their corresponding states (i.e. their + // indices in the ClrEHUnwindMap). + int NumStates = FuncInfo.ClrEHUnwindMap.size(); + assert(NumStates > 0 && "Don't need exception table!"); + DenseMap<const MachineBasicBlock *, int> HandlerStates; + for (int State = 0; State < NumStates; ++State) { + MachineBasicBlock *HandlerBlock = + FuncInfo.ClrEHUnwindMap[State].Handler.get<MachineBasicBlock *>(); + HandlerStates[HandlerBlock] = State; + // Use this loop through all handlers to verify our assumption (used in + // the MinEnclosingState computation) that enclosing funclets have lower + // state numbers than their enclosed funclets. + assert(FuncInfo.ClrEHUnwindMap[State].HandlerParentState < State && + "ill-formed state numbering"); + } + // Map the main function to the NullState. + HandlerStates[&MF->front()] = NullState; + + // Write out a sentinel indicating the end of the standard (Windows) xdata + // and the start of the additional (CLR) info. + OS.EmitIntValue(0xffffffff, 4); + // Write out the number of funclets + OS.EmitIntValue(NumStates, 4); + + // Walk the machine blocks/instrs, computing and emitting a few things: + // 1. Emit a list of the offsets to each handler entry, in lexical order. + // 2. Compute a map (EndSymbolMap) from each funclet to the symbol at its end. + // 3. Compute the list of ClrClauses, in the required order (inner before + // outer, earlier before later; the order by which a forward scan with + // early termination will find the innermost enclosing clause covering + // a given address). + // 4. A map (MinClauseMap) from each handler index to the index of the + // outermost funclet/function which contains a try clause targeting the + // key handler. This will be used to determine IsDuplicate-ness when + // emitting ClrClauses. The NullState value is used to indicate that the + // top-level function contains a try clause targeting the key handler. + // HandlerStack is a stack of (PendingStartLabel, PendingState) pairs for + // try regions we entered before entering the PendingState try but which + // we haven't yet exited. + SmallVector<std::pair<const MCSymbol *, int>, 4> HandlerStack; + // EndSymbolMap and MinClauseMap are maps described above. + std::unique_ptr<MCSymbol *[]> EndSymbolMap(new MCSymbol *[NumStates]); + SmallVector<int, 4> MinClauseMap((size_t)NumStates, NumStates); + + // Visit the root function and each funclet. + for (MachineFunction::const_iterator FuncletStart = MF->begin(), + FuncletEnd = MF->begin(), + End = MF->end(); + FuncletStart != End; FuncletStart = FuncletEnd) { + int FuncletState = HandlerStates[&*FuncletStart]; + // Find the end of the funclet + MCSymbol *EndSymbol = FuncEndSym; + while (++FuncletEnd != End) { + if (FuncletEnd->isEHFuncletEntry()) { + EndSymbol = getMCSymbolForMBB(Asm, &*FuncletEnd); + break; + } + } + // Emit the function/funclet end and, if this is a funclet (and not the + // root function), record it in the EndSymbolMap. + OS.EmitValue(getOffset(EndSymbol, FuncBeginSym), 4); + if (FuncletState != NullState) { + // Record the end of the handler. + EndSymbolMap[FuncletState] = EndSymbol; + } + + // Walk the state changes in this function/funclet and compute its clauses. + // Funclets always start in the null state. + const MCSymbol *CurrentStartLabel = nullptr; + int CurrentState = NullState; + assert(HandlerStack.empty()); + for (const auto &StateChange : + InvokeStateChangeIterator::range(FuncInfo, FuncletStart, FuncletEnd)) { + // Close any try regions we're not still under + int StillPendingState = + getTryAncestor(FuncInfo, CurrentState, StateChange.NewState); + while (CurrentState != StillPendingState) { + assert(CurrentState != NullState && + "Failed to find still-pending state!"); + // Close the pending clause + Clauses.push_back({CurrentStartLabel, StateChange.PreviousEndLabel, + CurrentState, FuncletState}); + // Now the next-outer try region is current + CurrentState = FuncInfo.ClrEHUnwindMap[CurrentState].TryParentState; + // Pop the new start label from the handler stack if we've exited all + // inner try regions of the corresponding try region. + if (HandlerStack.back().second == CurrentState) + CurrentStartLabel = HandlerStack.pop_back_val().first; + } + + if (StateChange.NewState != CurrentState) { + // For each clause we're starting, update the MinClauseMap so we can + // know which is the topmost funclet containing a clause targeting + // it. + for (int EnteredState = StateChange.NewState; + EnteredState != CurrentState; + EnteredState = + FuncInfo.ClrEHUnwindMap[EnteredState].TryParentState) { + int &MinEnclosingState = MinClauseMap[EnteredState]; + if (FuncletState < MinEnclosingState) + MinEnclosingState = FuncletState; + } + // Save the previous current start/label on the stack and update to + // the newly-current start/state. + HandlerStack.emplace_back(CurrentStartLabel, CurrentState); + CurrentStartLabel = StateChange.NewStartLabel; + CurrentState = StateChange.NewState; + } + } + assert(HandlerStack.empty()); + } + + // Now emit the clause info, starting with the number of clauses. + OS.EmitIntValue(Clauses.size(), 4); + for (ClrClause &Clause : Clauses) { + // Emit a CORINFO_EH_CLAUSE : + /* + struct CORINFO_EH_CLAUSE + { + CORINFO_EH_CLAUSE_FLAGS Flags; // actually a CorExceptionFlag + DWORD TryOffset; + DWORD TryLength; // actually TryEndOffset + DWORD HandlerOffset; + DWORD HandlerLength; // actually HandlerEndOffset + union + { + DWORD ClassToken; // use for catch clauses + DWORD FilterOffset; // use for filter clauses + }; + }; + + enum CORINFO_EH_CLAUSE_FLAGS + { + CORINFO_EH_CLAUSE_NONE = 0, + CORINFO_EH_CLAUSE_FILTER = 0x0001, // This clause is for a filter + CORINFO_EH_CLAUSE_FINALLY = 0x0002, // This clause is a finally clause + CORINFO_EH_CLAUSE_FAULT = 0x0004, // This clause is a fault clause + }; + typedef enum CorExceptionFlag + { + COR_ILEXCEPTION_CLAUSE_NONE, + COR_ILEXCEPTION_CLAUSE_FILTER = 0x0001, // This is a filter clause + COR_ILEXCEPTION_CLAUSE_FINALLY = 0x0002, // This is a finally clause + COR_ILEXCEPTION_CLAUSE_FAULT = 0x0004, // This is a fault clause + COR_ILEXCEPTION_CLAUSE_DUPLICATED = 0x0008, // duplicated clause. This + // clause was duplicated + // to a funclet which was + // pulled out of line + } CorExceptionFlag; + */ + // Add 1 to the start/end of the EH clause; the IP associated with a + // call when the runtime does its scan is the IP of the next instruction + // (the one to which control will return after the call), so we need + // to add 1 to the end of the clause to cover that offset. We also add + // 1 to the start of the clause to make sure that the ranges reported + // for all clauses are disjoint. Note that we'll need some additional + // logic when machine traps are supported, since in that case the IP + // that the runtime uses is the offset of the faulting instruction + // itself; if such an instruction immediately follows a call but the + // two belong to different clauses, we'll need to insert a nop between + // them so the runtime can distinguish the point to which the call will + // return from the point at which the fault occurs. + + const MCExpr *ClauseBegin = + getOffsetPlusOne(Clause.StartLabel, FuncBeginSym); + const MCExpr *ClauseEnd = getOffsetPlusOne(Clause.EndLabel, FuncBeginSym); + + const ClrEHUnwindMapEntry &Entry = FuncInfo.ClrEHUnwindMap[Clause.State]; + MachineBasicBlock *HandlerBlock = Entry.Handler.get<MachineBasicBlock *>(); + MCSymbol *BeginSym = getMCSymbolForMBB(Asm, HandlerBlock); + const MCExpr *HandlerBegin = getOffset(BeginSym, FuncBeginSym); + MCSymbol *EndSym = EndSymbolMap[Clause.State]; + const MCExpr *HandlerEnd = getOffset(EndSym, FuncBeginSym); + + uint32_t Flags = 0; + switch (Entry.HandlerType) { + case ClrHandlerType::Catch: + // Leaving bits 0-2 clear indicates catch. + break; + case ClrHandlerType::Filter: + Flags |= 1; + break; + case ClrHandlerType::Finally: + Flags |= 2; + break; + case ClrHandlerType::Fault: + Flags |= 4; + break; + } + if (Clause.EnclosingState != MinClauseMap[Clause.State]) { + // This is a "duplicate" clause; the handler needs to be entered from a + // frame above the one holding the invoke. + assert(Clause.EnclosingState > MinClauseMap[Clause.State]); + Flags |= 8; + } + OS.EmitIntValue(Flags, 4); + + // Write the clause start/end + OS.EmitValue(ClauseBegin, 4); + OS.EmitValue(ClauseEnd, 4); + + // Write out the handler start/end + OS.EmitValue(HandlerBegin, 4); + OS.EmitValue(HandlerEnd, 4); + + // Write out the type token or filter offset + assert(Entry.HandlerType != ClrHandlerType::Filter && "NYI: filters"); + OS.EmitIntValue(Entry.TypeToken, 4); + } +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h new file mode 100644 index 0000000..acb3010 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h @@ -0,0 +1,106 @@ +//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing windows exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H +#define LLVM_LIB_CODEGEN_ASMPRINTER_WIN64EXCEPTION_H + +#include "EHStreamer.h" + +namespace llvm { +class Function; +class GlobalValue; +class MachineFunction; +class MCExpr; +class Value; +struct WinEHFuncInfo; + +class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer { + /// Per-function flag to indicate if personality info should be emitted. + bool shouldEmitPersonality = false; + + /// Per-function flag to indicate if the LSDA should be emitted. + bool shouldEmitLSDA = false; + + /// Per-function flag to indicate if frame moves info should be emitted. + bool shouldEmitMoves = false; + + /// True if this is a 64-bit target and we should use image relative offsets. + bool useImageRel32 = false; + + /// Pointer to the current funclet entry BB. + const MachineBasicBlock *CurrentFuncletEntry = nullptr; + + void emitCSpecificHandlerTable(const MachineFunction *MF); + + void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo, + const MCSymbol *BeginLabel, + const MCSymbol *EndLabel, int State); + + /// Emit the EH table data for 32-bit and 64-bit functions using + /// the __CxxFrameHandler3 personality. + void emitCXXFrameHandler3Table(const MachineFunction *MF); + + /// Emit the EH table data for _except_handler3 and _except_handler4 + /// personality functions. These are only used on 32-bit and do not use CFI + /// tables. + void emitExceptHandlerTable(const MachineFunction *MF); + + void emitCLRExceptionTable(const MachineFunction *MF); + + void computeIP2StateTable( + const MachineFunction *MF, const WinEHFuncInfo &FuncInfo, + SmallVectorImpl<std::pair<const MCExpr *, int>> &IPToStateTable); + + /// Emits the label used with llvm.x86.seh.recoverfp, which is used by + /// outlined funclets. + void emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo, + StringRef FLinkageName); + + const MCExpr *create32bitRef(const MCSymbol *Value); + const MCExpr *create32bitRef(const GlobalValue *GV); + const MCExpr *getLabelPlusOne(const MCSymbol *Label); + const MCExpr *getOffset(const MCSymbol *OffsetOf, const MCSymbol *OffsetFrom); + const MCExpr *getOffsetPlusOne(const MCSymbol *OffsetOf, + const MCSymbol *OffsetFrom); + + /// Gets the offset that we should use in a table for a stack object with the + /// given index. For targets using CFI (Win64, etc), this is relative to the + /// established SP at the end of the prologue. For targets without CFI (Win32 + /// only), it is relative to the frame pointer. + int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo); + +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + WinException(AsmPrinter *A); + ~WinException() override; + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; + + /// \brief Emit target-specific EH funclet machinery. + void beginFunclet(const MachineBasicBlock &MBB, MCSymbol *Sym) override; + void endFunclet() override; +}; +} + +#endif + diff --git a/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp new file mode 100644 index 0000000..d12fdb2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -0,0 +1,690 @@ +//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions with +// target specific instruction which implement the same semantics in a way +// which better fits the target backend. This can include the use of either +// (intrinsic-based) load-linked/store-conditional loops, AtomicCmpXchg, or +// type coercions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AtomicExpandUtils.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "atomic-expand" + +namespace { + class AtomicExpand: public FunctionPass { + const TargetMachine *TM; + const TargetLowering *TLI; + public: + static char ID; // Pass identification, replacement for typeid + explicit AtomicExpand(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { + initializeAtomicExpandPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + private: + bool bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad); + IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL); + LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI); + bool tryExpandAtomicLoad(LoadInst *LI); + bool expandAtomicLoadToLL(LoadInst *LI); + bool expandAtomicLoadToCmpXchg(LoadInst *LI); + StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI); + bool expandAtomicStore(StoreInst *SI); + bool tryExpandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function<Value *(IRBuilder<> &, Value *)> PerformOp); + bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI); + bool isIdempotentRMW(AtomicRMWInst *AI); + bool simplifyIdempotentRMW(AtomicRMWInst *AI); + }; +} + +char AtomicExpand::ID = 0; +char &llvm::AtomicExpandID = AtomicExpand::ID; +INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", + "Expand Atomic calls in terms of either load-linked & store-conditional or cmpxchg", + false, false) + +FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) { + return new AtomicExpand(TM); +} + +bool AtomicExpand::runOnFunction(Function &F) { + if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand()) + return false; + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (I->isAtomic()) + AtomicInsts.push_back(&*I); + } + + bool MadeChange = false; + for (auto I : AtomicInsts) { + auto LI = dyn_cast<LoadInst>(I); + auto SI = dyn_cast<StoreInst>(I); + auto RMWI = dyn_cast<AtomicRMWInst>(I); + auto CASI = dyn_cast<AtomicCmpXchgInst>(I); + assert((LI || SI || RMWI || CASI || isa<FenceInst>(I)) && + "Unknown atomic instruction"); + + auto FenceOrdering = Monotonic; + bool IsStore, IsLoad; + if (TLI->getInsertFencesForAtomic()) { + if (LI && isAtLeastAcquire(LI->getOrdering())) { + FenceOrdering = LI->getOrdering(); + LI->setOrdering(Monotonic); + IsStore = false; + IsLoad = true; + } else if (SI && isAtLeastRelease(SI->getOrdering())) { + FenceOrdering = SI->getOrdering(); + SI->setOrdering(Monotonic); + IsStore = true; + IsLoad = false; + } else if (RMWI && (isAtLeastRelease(RMWI->getOrdering()) || + isAtLeastAcquire(RMWI->getOrdering()))) { + FenceOrdering = RMWI->getOrdering(); + RMWI->setOrdering(Monotonic); + IsStore = IsLoad = true; + } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) && + (isAtLeastRelease(CASI->getSuccessOrdering()) || + isAtLeastAcquire(CASI->getSuccessOrdering()))) { + // If a compare and swap is lowered to LL/SC, we can do smarter fence + // insertion, with a stronger one on the success path than on the + // failure path. As a result, fence insertion is directly done by + // expandAtomicCmpXchg in that case. + FenceOrdering = CASI->getSuccessOrdering(); + CASI->setSuccessOrdering(Monotonic); + CASI->setFailureOrdering(Monotonic); + IsStore = IsLoad = true; + } + + if (FenceOrdering != Monotonic) { + MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad); + } + } + + if (LI) { + if (LI->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + LI = convertAtomicLoadToIntegerType(LI); + assert(LI->getType()->isIntegerTy() && "invariant broken"); + MadeChange = true; + } + + MadeChange |= tryExpandAtomicLoad(LI); + } else if (SI) { + if (SI->getValueOperand()->getType()->isFloatingPointTy()) { + // TODO: add a TLI hook to control this so that each target can + // convert to lowering the original type one at a time. + SI = convertAtomicStoreToIntegerType(SI); + assert(SI->getValueOperand()->getType()->isIntegerTy() && + "invariant broken"); + MadeChange = true; + } + + if (TLI->shouldExpandAtomicStoreInIR(SI)) + MadeChange |= expandAtomicStore(SI); + } else if (RMWI) { + // There are two different ways of expanding RMW instructions: + // - into a load if it is idempotent + // - into a Cmpxchg/LL-SC loop otherwise + // we try them in that order. + + if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) { + MadeChange = true; + } else { + MadeChange |= tryExpandAtomicRMW(RMWI); + } + } else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI)) { + MadeChange |= expandAtomicCmpXchg(CASI); + } + } + return MadeChange; +} + +bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order, + bool IsStore, bool IsLoad) { + IRBuilder<> Builder(I); + + auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad); + + auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad); + // The trailing fence is emitted before the instruction instead of after + // because there is no easy way of setting Builder insertion point after + // an instruction. So we must erase it from the BB, and insert it back + // in the right place. + // We have a guard here because not every atomic operation generates a + // trailing fence. + if (TrailingFence) { + TrailingFence->removeFromParent(); + TrailingFence->insertAfter(I); + } + + return (LeadingFence || TrailingFence); +} + +/// Get the iX type with the same bitwidth as T. +IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T, + const DataLayout &DL) { + EVT VT = TLI->getValueType(DL, T); + unsigned BitWidth = VT.getStoreSizeInBits(); + assert(BitWidth == VT.getSizeInBits() && "must be a power of two"); + return IntegerType::get(T->getContext(), BitWidth); +} + +/// Convert an atomic load of a non-integral type to an integer load of the +/// equivelent bitwidth. See the function comment on +/// convertAtomicStoreToIntegerType for background. +LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) { + auto *M = LI->getModule(); + Type *NewTy = getCorrespondingIntegerType(LI->getType(), + M->getDataLayout()); + + IRBuilder<> Builder(LI); + + Value *Addr = LI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + auto *NewLI = Builder.CreateLoad(NewAddr); + NewLI->setAlignment(LI->getAlignment()); + NewLI->setVolatile(LI->isVolatile()); + NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n"); + + Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType()); + LI->replaceAllUsesWith(NewVal); + LI->eraseFromParent(); + return NewLI; +} + +bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) { + switch (TLI->shouldExpandAtomicLoadInIR(LI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC( + LI, LI->getPointerOperand(), LI->getOrdering(), + [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; }); + case TargetLoweringBase::AtomicExpansionKind::LLOnly: + return expandAtomicLoadToLL(LI); + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: + return expandAtomicLoadToCmpXchg(LI); + } + llvm_unreachable("Unhandled case in tryExpandAtomicLoad"); +} + +bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) { + IRBuilder<> Builder(LI); + + // On some architectures, load-linked instructions are atomic for larger + // sizes than normal loads. For example, the only 64-bit load guaranteed + // to be single-copy atomic by ARM is an ldrexd (A3.5.3). + Value *Val = + TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering()); + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); + + LI->replaceAllUsesWith(Val); + LI->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) { + IRBuilder<> Builder(LI); + AtomicOrdering Order = LI->getOrdering(); + Value *Addr = LI->getPointerOperand(); + Type *Ty = cast<PointerType>(Addr->getType())->getElementType(); + Constant *DummyVal = Constant::getNullValue(Ty); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, DummyVal, DummyVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded"); + + LI->replaceAllUsesWith(Loaded); + LI->eraseFromParent(); + + return true; +} + +/// Convert an atomic store of a non-integral type to an integer store of the +/// equivelent bitwidth. We used to not support floating point or vector +/// atomics in the IR at all. The backends learned to deal with the bitcast +/// idiom because that was the only way of expressing the notion of a atomic +/// float or vector store. The long term plan is to teach each backend to +/// instruction select from the original atomic store, but as a migration +/// mechanism, we convert back to the old format which the backends understand. +/// Each backend will need individual work to recognize the new format. +StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) { + IRBuilder<> Builder(SI); + auto *M = SI->getModule(); + Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(), + M->getDataLayout()); + Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy); + + Value *Addr = SI->getPointerOperand(); + Type *PT = PointerType::get(NewTy, + Addr->getType()->getPointerAddressSpace()); + Value *NewAddr = Builder.CreateBitCast(Addr, PT); + + StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr); + NewSI->setAlignment(SI->getAlignment()); + NewSI->setVolatile(SI->isVolatile()); + NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope()); + DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n"); + SI->eraseFromParent(); + return NewSI; +} + +bool AtomicExpand::expandAtomicStore(StoreInst *SI) { + // This function is only called on atomic stores that are too large to be + // atomic if implemented as a native store. So we replace them by an + // atomic swap, that can be implemented for example as a ldrex/strex on ARM + // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes. + // It is the responsibility of the target to only signal expansion via + // shouldExpandAtomicRMW in cases where this is required and possible. + IRBuilder<> Builder(SI); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), SI->getOrdering()); + SI->eraseFromParent(); + + // Now we have an appropriate swap instruction, lower it as usual. + return tryExpandAtomicRMW(AI); +} + +static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, + Value *Loaded, Value *NewVal, + AtomicOrdering MemOpOrder, + Value *&Success, Value *&NewLoaded) { + Value* Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, MemOpOrder, + AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder)); + Success = Builder.CreateExtractValue(Pair, 1, "success"); + NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); +} + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, + Value *Loaded, Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + default: + llvm_unreachable("Unknown atomic op"); + } +} + +bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) { + switch (TLI->shouldExpandAtomicRMWInIR(AI)) { + case TargetLoweringBase::AtomicExpansionKind::None: + return false; + case TargetLoweringBase::AtomicExpansionKind::LLSC: + return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(), + [&](IRBuilder<> &Builder, Value *Loaded) { + return performAtomicOp(AI->getOperation(), + Builder, Loaded, + AI->getValOperand()); + }); + case TargetLoweringBase::AtomicExpansionKind::CmpXChg: + return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun); + default: + llvm_unreachable("Unhandled case in tryExpandAtomicRMW"); + } +} + +bool AtomicExpand::expandAtomicOpToLLSC( + Instruction *I, Value *Addr, AtomicOrdering MemOpOrder, + std::function<Value *(IRBuilder<> &, Value *)> PerformOp) { + BasicBlock *BB = I->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // fence? + // atomicrmw.start: + // %loaded = @load.linked(%addr) + // %new = some_op iN %loaded, %incr + // %stored = @store_conditional(%new, %addr) + // %try_again = icmp i32 ne %stored, 0 + // br i1 %try_again, label %loop, label %atomicrmw.end + // atomicrmw.end: + // fence? + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from I. + IRBuilder<> Builder(I); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + + Value *NewVal = PerformOp(Builder, Loaded); + + Value *StoreSuccess = + TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); + Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + I->replaceAllUsesWith(Loaded); + I->eraseFromParent(); + + return true; +} + +bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { + AtomicOrdering SuccessOrder = CI->getSuccessOrdering(); + AtomicOrdering FailureOrder = CI->getFailureOrdering(); + Value *Addr = CI->getPointerOperand(); + BasicBlock *BB = CI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + // If getInsertFencesForAtomic() returns true, then the target does not want + // to deal with memory orders, and emitLeading/TrailingFence should take care + // of everything. Otherwise, emitLeading/TrailingFence are no-op and we + // should preserve the ordering. + AtomicOrdering MemOpOrder = + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder; + + // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord + // + // The full expansion we produce is: + // [...] + // fence? + // cmpxchg.start: + // %loaded = @load.linked(%addr) + // %should_store = icmp eq %loaded, %desired + // br i1 %should_store, label %cmpxchg.trystore, + // label %cmpxchg.nostore + // cmpxchg.trystore: + // %stored = @store_conditional(%new, %addr) + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.nostore: + // @load_linked_fail_balance()? + // br label %cmpxchg.failure + // cmpxchg.failure: + // fence? + // br label %cmpxchg.end + // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end"); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); + auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); + + // This grabs the DebugLoc from CI + IRBuilder<> Builder(CI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we might want a fence too. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *ShouldStore = + Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); + + // If the cmpxchg doesn't actually need any ordering when it fails, we can + // jump straight past that fence instruction (if it exists). + Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB); + + Builder.SetInsertPoint(TryStoreBB); + Value *StoreSuccess = TLI->emitStoreConditional( + Builder, CI->getNewValOperand(), Addr, MemOpOrder); + StoreSuccess = Builder.CreateICmpEQ( + StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); + + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); + TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(ExitBB); + + Builder.SetInsertPoint(NoStoreBB); + // In the failing case, where we don't execute the store-conditional, the + // target might want to balance out the load-linked with a dedicated + // instruction (e.g., on ARM, clearing the exclusive monitor). + TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder); + Builder.CreateBr(FailureBB); + + Builder.SetInsertPoint(FailureBB); + TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true, + /*IsLoad=*/true); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); + return true; +} + +bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) { + auto C = dyn_cast<ConstantInt>(RMWI->getValOperand()); + if(!C) + return false; + + AtomicRMWInst::BinOp Op = RMWI->getOperation(); + switch(Op) { + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + case AtomicRMWInst::Or: + case AtomicRMWInst::Xor: + return C->isZero(); + case AtomicRMWInst::And: + return C->isMinusOne(); + // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/... + default: + return false; + } +} + +bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) { + if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) { + tryExpandAtomicLoad(ResultingLoad); + return true; + } + return false; +} + +bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, + CreateCmpXchgInstFun CreateCmpXchg) { + assert(AI); + + AtomicOrdering MemOpOrder = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + // Atomics require at least natural alignment. + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits() / 8); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *NewLoaded = nullptr; + Value *Success = nullptr; + + CreateCmpXchg(Builder, Addr, Loaded, NewVal, MemOpOrder, + Success, NewLoaded); + assert(Success && NewLoaded); + + Loaded->addIncoming(NewLoaded, LoopBB); + + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + + AI->replaceAllUsesWith(NewLoaded); + AI->eraseFromParent(); + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp new file mode 100644 index 0000000..a67e194 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -0,0 +1,38 @@ +//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the implementation of a basic TargetTransformInfo pass +/// predicated on the target abstractions present in the target independent +/// code generator. It uses these (primarily TargetLowering) to model as much +/// of the TTI query interface as possible. It is included by most targets so +/// that they can specialize only a small subset of the query space. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TargetTransformInfoImpl.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include <utility> +using namespace llvm; + +#define DEBUG_TYPE "basictti" + +// This flag is used by the template base class for BasicTTIImpl, and here to +// provide a definition. +cl::opt<unsigned> + llvm::PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), + cl::desc("Threshold for partial unrolling"), + cl::Hidden); + +BasicTTIImpl::BasicTTIImpl(const TargetMachine *TM, const Function &F) + : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)), + TLI(ST->getTargetLowering()) {} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp new file mode 100644 index 0000000..604feed --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -0,0 +1,1888 @@ +//===-- BranchFolding.cpp - Fold machine code branch instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass forwards branches to unconditional branches to make them branch +// directly to the target block. This pass often results in dead MBB's, which +// it then removes. +// +// Note that this pass must be run after register allocation, it cannot handle +// SSA form. It also must handle virtual registers for targets that emit virtual +// ISA (e.g. NVPTX). +// +//===----------------------------------------------------------------------===// + +#include "BranchFolding.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "branchfolding" + +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumBranchOpts, "Number of branches optimized"); +STATISTIC(NumTailMerge , "Number of block tails merged"); +STATISTIC(NumHoist , "Number of times common instructions are hoisted"); + +static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", + cl::init(cl::BOU_UNSET), cl::Hidden); + +// Throttle for huge numbers of predecessors (compile speed problems) +static cl::opt<unsigned> +TailMergeThreshold("tail-merge-threshold", + cl::desc("Max number of predecessors to consider tail merging"), + cl::init(150), cl::Hidden); + +// Heuristic for tail merging (and, inversely, tail duplication). +// TODO: This should be replaced with a target query. +static cl::opt<unsigned> +TailMergeSize("tail-merge-size", + cl::desc("Min number of instructions to consider tail merging"), + cl::init(3), cl::Hidden); + +namespace { + /// BranchFolderPass - Wrap branch folder in a machine function pass. + class BranchFolderPass : public MachineFunctionPass { + public: + static char ID; + explicit BranchFolderPass(): MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char BranchFolderPass::ID = 0; +char &llvm::BranchFolderPassID = BranchFolderPass::ID; + +INITIALIZE_PASS(BranchFolderPass, "branch-folder", + "Control Flow Optimizer", false, false) + +bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + // TailMerge can create jump into if branches that make CFG irreducible for + // HW that requires structurized CFG. + bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() && + PassConfig->getEnableTailMerge(); + BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, + getAnalysis<MachineBlockFrequencyInfo>(), + getAnalysis<MachineBranchProbabilityInfo>()); + return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(), + MF.getSubtarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); +} + +BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &FreqInfo, + const MachineBranchProbabilityInfo &ProbInfo) + : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo), + MBPI(ProbInfo) { + switch (FlagEnableTailMerge) { + case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break; + case cl::BOU_TRUE: EnableTailMerge = true; break; + case cl::BOU_FALSE: EnableTailMerge = false; break; + } +} + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + MachineFunction *MF = MBB->getParent(); + // drop all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // Avoid matching if this pointer gets reused. + TriedMerging.erase(MBB); + + // Remove the block. + MF->erase(MBB); + FuncletMembership.erase(MBB); +} + +/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def +/// followed by terminators, and if the implicitly defined registers are not +/// used by the terminators, remove those implicit_def's. e.g. +/// BB1: +/// r0 = implicit_def +/// r1 = implicit_def +/// br +/// This block can be optimized away later if the implicit instructions are +/// removed. +bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { + SmallSet<unsigned, 4> ImpDefRegs; + MachineBasicBlock::iterator I = MBB->begin(); + while (I != MBB->end()) { + if (!I->isImplicitDef()) + break; + unsigned Reg = I->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ImpDefRegs.insert(*SubRegs); + } else { + ImpDefRegs.insert(Reg); + } + ++I; + } + if (ImpDefRegs.empty()) + return false; + + MachineBasicBlock::iterator FirstTerm = I; + while (I != MBB->end()) { + if (!TII->isUnpredicatedTerminator(I)) + return false; + // See if it uses any of the implicitly defined registers. + for (const MachineOperand &MO : I->operands()) { + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (ImpDefRegs.count(Reg)) + return false; + } + ++I; + } + + I = MBB->begin(); + while (I != FirstTerm) { + MachineInstr *ImpDefMI = &*I; + ++I; + MBB->erase(ImpDefMI); + } + + return true; +} + +/// OptimizeFunction - Perhaps branch folding, tail merging and other +/// CFG optimizations on the given function. +bool BranchFolder::OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi) { + if (!tii) return false; + + TriedMerging.clear(); + + TII = tii; + TRI = tri; + MMI = mmi; + RS = nullptr; + + // Use a RegScavenger to help update liveness when required. + MachineRegisterInfo &MRI = MF.getRegInfo(); + if (MRI.tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS = new RegScavenger(); + else + MRI.invalidateLiveness(); + + // Fix CFG. The later algorithms expect it to be right. + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(MBB, TBB, FBB, Cond, true)) + MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); + MadeChange |= OptimizeImpDefsBlock(&MBB); + } + + // Recalculate funclet membership. + FuncletMembership = getFuncletMembership(MF); + + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = TailMergeBlocks(MF); + MadeChangeThisIteration |= OptimizeBranches(MF); + if (EnableHoistCommonCode) + MadeChangeThisIteration |= HoistCommonCode(MF); + MadeChange |= MadeChangeThisIteration; + } + + // See if any jump tables have become dead as the code generator + // did its thing. + MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); + if (!JTI) { + delete RS; + return MadeChange; + } + + // Walk the function to find jump tables that are live. + BitVector JTIsLive(JTI->getJumpTables().size()); + for (const MachineBasicBlock &BB : MF) { + for (const MachineInstr &I : BB) + for (const MachineOperand &Op : I.operands()) { + if (!Op.isJTI()) continue; + + // Remember that this JT is live. + JTIsLive.set(Op.getIndex()); + } + } + + // Finally, remove dead jump tables. This happens when the + // indirect jump was unreachable (and thus deleted). + for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) + if (!JTIsLive.test(i)) { + JTI->RemoveJumpTable(i); + MadeChange = true; + } + + delete RS; + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Tail Merging of Blocks +//===----------------------------------------------------------------------===// + +/// HashMachineInstr - Compute a hash value for MI and its operands. +static unsigned HashMachineInstr(const MachineInstr *MI) { + unsigned Hash = MI->getOpcode(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &Op = MI->getOperand(i); + + // Merge in bits from the operand if easy. We can't use MachineOperand's + // hash_code here because it's not deterministic and we sort by hash value + // later. + unsigned OperandHash = 0; + switch (Op.getType()) { + case MachineOperand::MO_Register: + OperandHash = Op.getReg(); + break; + case MachineOperand::MO_Immediate: + OperandHash = Op.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + OperandHash = Op.getMBB()->getNumber(); + break; + case MachineOperand::MO_FrameIndex: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_JumpTableIndex: + OperandHash = Op.getIndex(); + break; + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + // Global address / external symbol are too hard, don't bother, but do + // pull in the offset. + OperandHash = Op.getOffset(); + break; + default: + break; + } + + Hash += ((OperandHash << 3) | Op.getType()) << (i & 31); + } + return Hash; +} + +/// HashEndOfMBB - Hash the last instruction in the MBB. +static unsigned HashEndOfMBB(const MachineBasicBlock *MBB) { + MachineBasicBlock::const_iterator I = MBB->getLastNonDebugInstr(); + if (I == MBB->end()) + return 0; + + return HashMachineInstr(I); +} + +/// ComputeCommonTailLength - Given two machine basic blocks, compute the number +/// of instructions they actually have in common together at their end. Return +/// iterators for the first shared instruction in each block. +static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2) { + I1 = MBB1->end(); + I2 = MBB2->end(); + + unsigned TailLen = 0; + while (I1 != MBB1->begin() && I2 != MBB2->begin()) { + --I1; --I2; + // Skip debugging pseudos; necessary to avoid changing the code. + while (I1->isDebugValue()) { + if (I1==MBB1->begin()) { + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) + // I1==DBG at begin; I2==DBG at begin + return TailLen; + --I2; + } + ++I2; + // I1==DBG at begin; I2==non-DBG, or first of DBGs not at begin + return TailLen; + } + --I1; + } + // I1==first (untested) non-DBG preceding known match + while (I2->isDebugValue()) { + if (I2==MBB2->begin()) { + ++I1; + // I1==non-DBG, or first of DBGs not at begin; I2==DBG at begin + return TailLen; + } + --I2; + } + // I1, I2==first (untested) non-DBGs preceding known match + if (!I1->isIdenticalTo(I2) || + // FIXME: This check is dubious. It's used to get around a problem where + // people incorrectly expect inline asm directives to remain in the same + // relative order. This is untenable because normal compiler + // optimizations (like this one) may reorder and/or merge these + // directives. + I1->isInlineAsm()) { + ++I1; ++I2; + break; + } + ++TailLen; + } + // Back past possible debugging pseudos at beginning of block. This matters + // when one block differs from the other only by whether debugging pseudos + // are present at the beginning. (This way, the various checks later for + // I1==MBB1->begin() work as expected.) + if (I1 == MBB1->begin() && I2 != MBB2->begin()) { + --I2; + while (I2->isDebugValue()) { + if (I2 == MBB2->begin()) + return TailLen; + --I2; + } + ++I2; + } + if (I2 == MBB2->begin() && I1 != MBB1->begin()) { + --I1; + while (I1->isDebugValue()) { + if (I1 == MBB1->begin()) + return TailLen; + --I1; + } + ++I1; + } + return TailLen; +} + +void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB) { + if (RS) { + RS->enterBasicBlock(CurMBB); + if (!CurMBB->empty()) + RS->forward(std::prev(CurMBB->end())); + for (unsigned int i = 1, e = TRI->getNumRegs(); i != e; i++) + if (RS->isRegUsed(i, false)) + NewMBB->addLiveIn(i); + } +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest) { + MachineBasicBlock *CurMBB = OldInst->getParent(); + + TII->ReplaceTailWithBranchTo(OldInst, NewDest); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(CurMBB, NewDest); + + ++NumTailMerge; +} + +/// SplitMBBAt - Given a machine basic block and an iterator into it, split the +/// MBB so that the part before the iterator falls into the part starting at the +/// iterator. This returns the new MBB. +MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return nullptr; + + MachineFunction &MF = *CurMBB.getParent(); + + // Create the fall-through block. + MachineFunction::iterator MBBI = CurMBB.getIterator(); + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); + CurMBB.getParent()->insert(++MBBI, NewMBB); + + // Move all the successors of this block to the specified block. + NewMBB->transferSuccessors(&CurMBB); + + // Add an edge from CurMBB to NewMBB for the fall-through. + CurMBB.addSuccessor(NewMBB); + + // Splice the code over. + NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end()); + + // NewMBB inherits CurMBB's block frequency. + MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB)); + + // For targets that use the register scavenger, we must maintain LiveIns. + MaintainLiveIns(&CurMBB, NewMBB); + + // Add the new block to the funclet. + const auto &FuncletI = FuncletMembership.find(&CurMBB); + if (FuncletI != FuncletMembership.end()) + FuncletMembership[NewMBB] = FuncletI->second; + + return NewMBB; +} + +/// EstimateRuntime - Make a rough estimate for how long it will take to run +/// the specified code. +static unsigned EstimateRuntime(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + unsigned Time = 0; + for (; I != E; ++I) { + if (I->isDebugValue()) + continue; + if (I->isCall()) + Time += 10; + else if (I->mayLoad() || I->mayStore()) + Time += 2; + else + ++Time; + } + return Time; +} + +// CurMBB needs to add an unconditional branch to SuccMBB (we removed these +// branches temporarily for tail merging). In the case where CurMBB ends +// with a conditional branch to the next block, optimize by reversing the +// test and conditionally branching to SuccMBB instead. +static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, + const TargetInstrInfo *TII) { + MachineFunction *MF = CurMBB->getParent(); + MachineFunction::iterator I = std::next(MachineFunction::iterator(CurMBB)); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + DebugLoc dl; // FIXME: this is nowhere + if (I != MF->end() && + !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + MachineBasicBlock *NextBB = &*I; + if (TBB == NextBB && !Cond.empty() && !FBB) { + if (!TII->ReverseBranchCondition(Cond)) { + TII->RemoveBranch(*CurMBB); + TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl); + return; + } + } + } + TII->InsertBranch(*CurMBB, SuccBB, nullptr, + SmallVector<MachineOperand, 0>(), dl); +} + +bool +BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const { + if (getHash() < o.getHash()) + return true; + if (getHash() > o.getHash()) + return false; + if (getBlock()->getNumber() < o.getBlock()->getNumber()) + return true; + if (getBlock()->getNumber() > o.getBlock()->getNumber()) + return false; + // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing + // an object with itself. +#ifndef _GLIBCXX_DEBUG + llvm_unreachable("Predecessor appears twice"); +#else + return false; +#endif +} + +BlockFrequency +BranchFolder::MBFIWrapper::getBlockFreq(const MachineBasicBlock *MBB) const { + auto I = MergedBBFreq.find(MBB); + + if (I != MergedBBFreq.end()) + return I->second; + + return MBFI.getBlockFreq(MBB); +} + +void BranchFolder::MBFIWrapper::setBlockFreq(const MachineBasicBlock *MBB, + BlockFrequency F) { + MergedBBFreq[MBB] = F; +} + +/// CountTerminators - Count the number of terminators in the given +/// block and set I to the position of the first non-terminator, if there +/// is one, or MBB->end() otherwise. +static unsigned CountTerminators(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &I) { + I = MBB->end(); + unsigned NumTerms = 0; + for (;;) { + if (I == MBB->begin()) { + I = MBB->end(); + break; + } + --I; + if (!I->isTerminator()) break; + ++NumTerms; + } + return NumTerms; +} + +/// ProfitableToMerge - Check if two machine basic blocks have a common tail +/// and decide if it would be profitable to merge those tails. Return the +/// length of the common tail and iterators to the first common instruction +/// in each block. +static bool +ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2, + unsigned minCommonTailLength, unsigned &CommonTailLen, + MachineBasicBlock::iterator &I1, + MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB, + DenseMap<const MachineBasicBlock *, int> &FuncletMembership) { + // It is never profitable to tail-merge blocks from two different funclets. + if (!FuncletMembership.empty()) { + auto Funclet1 = FuncletMembership.find(MBB1); + assert(Funclet1 != FuncletMembership.end()); + auto Funclet2 = FuncletMembership.find(MBB2); + assert(Funclet2 != FuncletMembership.end()); + if (Funclet1->second != Funclet2->second) + return false; + } + + CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); + if (CommonTailLen == 0) + return false; + DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() + << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + << '\n'); + + // It's almost always profitable to merge any number of non-terminator + // instructions with the block that falls through into the common successor. + if (MBB1 == PredBB || MBB2 == PredBB) { + MachineBasicBlock::iterator I; + unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I); + if (CommonTailLen > NumTerms) + return true; + } + + // If one of the blocks can be completely merged and happens to be in + // a position where the other could fall through into it, merge any number + // of instructions, because it can be done without a branch. + // TODO: If the blocks are not adjacent, move one of them so that they are? + if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin()) + return true; + if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin()) + return true; + + // If both blocks have an unconditional branch temporarily stripped out, + // count that as an additional common instruction for the following + // heuristics. + unsigned EffectiveTailLen = CommonTailLen; + if (SuccBB && MBB1 != PredBB && MBB2 != PredBB && + !MBB1->back().isBarrier() && + !MBB2->back().isBarrier()) + ++EffectiveTailLen; + + // Check if the common tail is long enough to be worthwhile. + if (EffectiveTailLen >= minCommonTailLength) + return true; + + // If we are optimizing for code size, 2 instructions in common is enough if + // we don't have to split a block. At worst we will be introducing 1 new + // branch instruction, which is likely to be smaller than the 2 + // instructions that would be deleted in the merge. + MachineFunction *MF = MBB1->getParent(); + return EffectiveTailLen >= 2 && MF->getFunction()->optForSize() && + (I1 == MBB1->begin() || I2 == MBB2->begin()); +} + +/// ComputeSameTails - Look through all the blocks in MergePotentials that have +/// hash CurHash (guaranteed to match the last element). Build the vector +/// SameTails of all those that have the (same) largest number of instructions +/// in common of any pair of these blocks. SameTails entries contain an +/// iterator into MergePotentials (from which the MachineBasicBlock can be +/// found) and a MachineBasicBlock::iterator into that MBB indicating the +/// instruction where the matching code sequence begins. +/// Order of elements in SameTails is the reverse of the order in which +/// those blocks appear in MergePotentials (where they are not necessarily +/// consecutive). +unsigned BranchFolder::ComputeSameTails(unsigned CurHash, + unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + unsigned maxCommonTailLength = 0U; + SameTails.clear(); + MachineBasicBlock::iterator TrialBBI1, TrialBBI2; + MPIterator HighestMPIter = std::prev(MergePotentials.end()); + for (MPIterator CurMPIter = std::prev(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter != B && CurMPIter->getHash() == CurHash; --CurMPIter) { + for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) { + unsigned CommonTailLen; + if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(), + minCommonTailLength, + CommonTailLen, TrialBBI1, TrialBBI2, + SuccBB, PredBB, + FuncletMembership)) { + if (CommonTailLen > maxCommonTailLength) { + SameTails.clear(); + maxCommonTailLength = CommonTailLen; + HighestMPIter = CurMPIter; + SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1)); + } + if (HighestMPIter == CurMPIter && + CommonTailLen == maxCommonTailLength) + SameTails.push_back(SameTailElt(I, TrialBBI2)); + } + if (I == B) + break; + } + } + return maxCommonTailLength; +} + +/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from +/// MergePotentials, restoring branches at ends of blocks as appropriate. +void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + MPIterator CurMPIter, B; + for (CurMPIter = std::prev(MergePotentials.end()), + B = MergePotentials.begin(); + CurMPIter->getHash() == CurHash; --CurMPIter) { + // Put the unconditional branch back, if we need one. + MachineBasicBlock *CurMBB = CurMPIter->getBlock(); + if (SuccBB && CurMBB != PredBB) + FixTail(CurMBB, SuccBB, TII); + if (CurMPIter == B) + break; + } + if (CurMPIter->getHash() != CurHash) + CurMPIter++; + MergePotentials.erase(CurMPIter, MergePotentials.end()); +} + +/// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist +/// only of the common tail. Create a block that does by splitting one. +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; + unsigned TimeEstimate = ~0U; + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + // Use PredBB if possible; that doesn't require a new branch. + if (SameTails[i].getBlock() == PredBB) { + commonTailIndex = i; + break; + } + // Otherwise, make a (fairly bogus) choice based on estimate of + // how long it will take the various blocks to execute. + unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(), + SameTails[i].getTailStartPos()); + if (t <= TimeEstimate) { + TimeEstimate = t; + commonTailIndex = i; + } + } + + MachineBasicBlock::iterator BBI = + SameTails[commonTailIndex].getTailStartPos(); + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // If the common tail includes any debug info we will take it pretty + // randomly from one of the inputs. Might be better to remove it? + DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " + << maxCommonTailLength); + + // If the split block unconditionally falls-thru to SuccBB, it will be + // merged. In control flow terms it should then take SuccBB's name. e.g. If + // SuccBB is an inner loop, the common tail is still part of the inner loop. + const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? + SuccBB->getBasicBlock() : MBB->getBasicBlock(); + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + + SameTails[commonTailIndex].setBlock(newMBB); + SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); + + // If we split PredBB, newMBB is the new predecessor. + if (PredBB == MBB) + PredBB = newMBB; + + return true; +} + +static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { + auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); + auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); + if ((E1 - I1) != (E2 - I2)) + return false; + for (; I1 != E1; ++I1, ++I2) { + if (**I1 != **I2) + return false; + } + return true; +} + +static void +removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + // Remove MMOs from memory operations in the common block + // when they do not match the ones from the block being tail-merged. + // This ensures later passes conservatively compute dependencies. + MachineBasicBlock *MBB = MBBIStartPos->getParent(); + // Note CommonTailLen does not necessarily matches the size of + // the common BB nor all its instructions because of debug + // instructions differences. + unsigned CommonTailLen = 0; + for (auto E = MBB->end(); MBBIStartPos != E; ++MBBIStartPos) + ++CommonTailLen; + + MachineBasicBlock::reverse_iterator MBBI = MBB->rbegin(); + MachineBasicBlock::reverse_iterator MBBIE = MBB->rend(); + MachineBasicBlock::reverse_iterator MBBICommon = MBBCommon.rbegin(); + MachineBasicBlock::reverse_iterator MBBIECommon = MBBCommon.rend(); + + while (CommonTailLen--) { + assert(MBBI != MBBIE && "Reached BB end within common tail length!"); + (void)MBBIE; + + if (MBBI->isDebugValue()) { + ++MBBI; + continue; + } + + while ((MBBICommon != MBBIECommon) && MBBICommon->isDebugValue()) + ++MBBICommon; + + assert(MBBICommon != MBBIECommon && + "Reached BB end within common tail length!"); + assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); + + if (MBBICommon->mayLoad() || MBBICommon->mayStore()) + if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + MBBICommon->dropMemRefs(); + + ++MBBI; + ++MBBICommon; + } +} + +// See if any of the blocks in MergePotentials (which all have a common single +// successor, or all have no successor) can be tail-merged. If there is a +// successor, any blocks in MergePotentials that are not tail-merged and +// are not immediately before Succ must have an unconditional branch to +// Succ added (but the predecessor/successor lists need no adjustment). +// The lone predecessor of Succ that falls through into Succ, +// if any, is given in PredBB. + +bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB) { + bool MadeChange = false; + + // Except for the special cases below, tail-merge if there are at least + // this many instructions in common. + unsigned minCommonTailLength = TailMergeSize; + + DEBUG(dbgs() << "\nTryTailMergeBlocks: "; + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", "); + dbgs() << "\n"; + if (SuccBB) { + dbgs() << " with successor BB#" << SuccBB->getNumber() << '\n'; + if (PredBB) + dbgs() << " which has fall-through from BB#" + << PredBB->getNumber() << "\n"; + } + dbgs() << "Looking for common tails of at least " + << minCommonTailLength << " instruction" + << (minCommonTailLength == 1 ? "" : "s") << '\n'; + ); + + // Sort by hash value so that blocks with identical end sequences sort + // together. + array_pod_sort(MergePotentials.begin(), MergePotentials.end()); + + // Walk through equivalence sets looking for actual exact matches. + while (MergePotentials.size() > 1) { + unsigned CurHash = MergePotentials.back().getHash(); + + // Build SameTails, identifying the set of blocks with this hash code + // and with the maximum number of instructions in common. + unsigned maxCommonTailLength = ComputeSameTails(CurHash, + minCommonTailLength, + SuccBB, PredBB); + + // If we didn't find any pair that has at least minCommonTailLength + // instructions in common, remove all blocks with this hash code and retry. + if (SameTails.empty()) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + + // If one of the blocks is the entire common tail (and not the entry + // block, which we can't jump to), we can treat all blocks with this same + // tail at once. Use PredBB if that is one of the possibilities, as that + // will not introduce any extra branches. + MachineBasicBlock *EntryBB = + &MergePotentials.front().getBlock()->getParent()->front(); + unsigned commonTailIndex = SameTails.size(); + // If there are two blocks, check to see if one can be made to fall through + // into the other. + if (SameTails.size() == 2 && + SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) && + SameTails[1].tailIsWholeBlock()) + commonTailIndex = 1; + else if (SameTails.size() == 2 && + SameTails[1].getBlock()->isLayoutSuccessor( + SameTails[0].getBlock()) && + SameTails[0].tailIsWholeBlock()) + commonTailIndex = 0; + else { + // Otherwise just pick one, favoring the fall-through predecessor if + // there is one. + for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { + MachineBasicBlock *MBB = SameTails[i].getBlock(); + if (MBB == EntryBB && SameTails[i].tailIsWholeBlock()) + continue; + if (MBB == PredBB) { + commonTailIndex = i; + break; + } + if (SameTails[i].tailIsWholeBlock()) + commonTailIndex = i; + } + } + + if (commonTailIndex == SameTails.size() || + (SameTails[commonTailIndex].getBlock() == PredBB && + !SameTails[commonTailIndex].tailIsWholeBlock())) { + // None of the blocks consist entirely of the common tail. + // Split a block so that one does. + if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } + } + + MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); + + // Recompute commont tail MBB's edge weights and block frequency. + setCommonTailEdgeWeights(*MBB); + + // MBB is common tail. Adjust all other BB's to jump to this one. + // Traversal must be forwards so erases work. + DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber() + << " for "); + for (unsigned int i=0, e = SameTails.size(); i != e; ++i) { + if (commonTailIndex == i) + continue; + DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() + << (i == e-1 ? "" : ", ")); + // Remove MMOs from memory operations as needed. + removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); + // Hack the end off BB i, making it jump to BB commonTailIndex instead. + ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); + // BB i is no longer a predecessor of SuccBB; remove it from the worklist. + MergePotentials.erase(SameTails[i].getMPIter()); + } + DEBUG(dbgs() << "\n"); + // We leave commonTailIndex in the worklist in case there are other blocks + // that match it with a smaller number of instructions. + MadeChange = true; + } + return MadeChange; +} + +bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { + bool MadeChange = false; + if (!EnableTailMerge) return MadeChange; + + // First find blocks with no successors. + MergePotentials.clear(); + for (MachineBasicBlock &MBB : MF) { + if (MergePotentials.size() == TailMergeThreshold) + break; + if (!TriedMerging.count(&MBB) && MBB.succ_empty()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(&MBB), &MBB)); + } + + // If this is a large problem, avoid visiting the same basic blocks + // multiple times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + // See if we can do any tail merging on those. + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); + + // Look at blocks (IBB) with multiple predecessors (PBB). + // We change each predecessor to a canonical form, by + // (1) temporarily removing any unconditional branch from the predecessor + // to IBB, and + // (2) alter conditional branches so they branch to the other block + // not IBB; this may require adding back an unconditional branch to IBB + // later, where there wasn't one coming in. E.g. + // Bcc IBB + // fallthrough to QBB + // here becomes + // Bncc QBB + // with a conceptual B to IBB after that, which never actually exists. + // With those changes, we see whether the predecessors' tails match, + // and merge them if so. We change things out of canonical form and + // back to the way they were later in the process. (OptimizeBranches + // would undo some of this, but we can't use it, because we'd get into + // a compile-time infinite loop repeatedly doing and undoing the same + // transformations.) + + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); + I != E; ++I) { + if (I->pred_size() < 2) continue; + SmallPtrSet<MachineBasicBlock *, 8> UniquePreds; + MachineBasicBlock *IBB = &*I; + MachineBasicBlock *PredBB = &*std::prev(I); + MergePotentials.clear(); + for (MachineBasicBlock *PBB : I->predecessors()) { + if (MergePotentials.size() == TailMergeThreshold) + break; + + if (TriedMerging.count(PBB)) + continue; + + // Skip blocks that loop to themselves, can't tail merge these. + if (PBB == IBB) + continue; + + // Visit each predecessor only once. + if (!UniquePreds.insert(PBB).second) + continue; + + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->hasEHPadSuccessor()) + continue; + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { + // Failing case: IBB is the target of a cbr, and we cannot reverse the + // branch. + SmallVector<MachineOperand, 4> NewCond(Cond); + if (!Cond.empty() && TBB == IBB) { + if (TII->ReverseBranchCondition(NewCond)) + continue; + // This is the QBB case described above + if (!FBB) { + auto Next = ++PBB->getIterator(); + if (Next != MF.end()) + FBB = &*Next; + } + } + + // Failing case: the only way IBB can be reached from PBB is via + // exception handling. Happens for landing pads. Would be nice to have + // a bit in the edge so we didn't have to do all this. + if (IBB->isEHPad()) { + MachineFunction::iterator IP = ++PBB->getIterator(); + MachineBasicBlock *PredNextBB = nullptr; + if (IP != MF.end()) + PredNextBB = &*IP; + if (!TBB) { + if (IBB != PredNextBB) // fallthrough + continue; + } else if (FBB) { + if (TBB != IBB && FBB != IBB) // cbr then ubr + continue; + } else if (Cond.empty()) { + if (TBB != IBB) // ubr + continue; + } else { + if (TBB != IBB && IBB != PredNextBB) // cbr + continue; + } + } + + // Remove the unconditional branch at the end, if any. + if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PBB); + if (!Cond.empty()) + // reinsert conditional branch only, for now + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr, + NewCond, dl); + } + + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), PBB)); + } + } + + // If this is a large problem, avoid visiting the same basic blocks multiple + // times. + if (MergePotentials.size() == TailMergeThreshold) + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i) + TriedMerging.insert(MergePotentials[i].getBlock()); + + if (MergePotentials.size() >= 2) + MadeChange |= TryTailMergeBlocks(IBB, PredBB); + + // Reinsert an unconditional branch if needed. The 1 below can occur as a + // result of removing blocks in TryTailMergeBlocks. + PredBB = &*std::prev(I); // this may have been changed in TryTailMergeBlocks + if (MergePotentials.size() == 1 && + MergePotentials.begin()->getBlock() != PredBB) + FixTail(MergePotentials.begin()->getBlock(), IBB, TII); + } + + return MadeChange; +} + +void BranchFolder::setCommonTailEdgeWeights(MachineBasicBlock &TailMBB) { + SmallVector<BlockFrequency, 2> EdgeFreqLs(TailMBB.succ_size()); + BlockFrequency AccumulatedMBBFreq; + + // Aggregate edge frequency of successor edge j: + // edgeFreq(j) = sum (freq(bb) * edgeProb(bb, j)), + // where bb is a basic block that is in SameTails. + for (const auto &Src : SameTails) { + const MachineBasicBlock *SrcMBB = Src.getBlock(); + BlockFrequency BlockFreq = MBBFreqInfo.getBlockFreq(SrcMBB); + AccumulatedMBBFreq += BlockFreq; + + // It is not necessary to recompute edge weights if TailBB has less than two + // successors. + if (TailMBB.succ_size() <= 1) + continue; + + auto EdgeFreq = EdgeFreqLs.begin(); + + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) + *EdgeFreq += BlockFreq * MBPI.getEdgeProbability(SrcMBB, *SuccI); + } + + MBBFreqInfo.setBlockFreq(&TailMBB, AccumulatedMBBFreq); + + if (TailMBB.succ_size() <= 1) + return; + + auto SumEdgeFreq = + std::accumulate(EdgeFreqLs.begin(), EdgeFreqLs.end(), BlockFrequency(0)) + .getFrequency(); + auto EdgeFreq = EdgeFreqLs.begin(); + + if (SumEdgeFreq > 0) { + for (auto SuccI = TailMBB.succ_begin(), SuccE = TailMBB.succ_end(); + SuccI != SuccE; ++SuccI, ++EdgeFreq) { + auto Prob = BranchProbability::getBranchProbability( + EdgeFreq->getFrequency(), SumEdgeFreq); + TailMBB.setSuccProbability(SuccI, Prob); + } + } +} + +//===----------------------------------------------------------------------===// +// Branch Optimization +//===----------------------------------------------------------------------===// + +bool BranchFolder::OptimizeBranches(MachineFunction &MF) { + bool MadeChange = false; + + // Make sure blocks are numbered in order + MF.RenumberBlocks(); + // Renumbering blocks alters funclet membership, recalculate it. + FuncletMembership = getFuncletMembership(MF); + + for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); + I != E; ) { + MachineBasicBlock *MBB = &*I++; + MadeChange |= OptimizeBlock(MBB); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + + return MadeChange; +} + +// Blocks should be considered empty if they contain only debug info; +// else the debug info would affect codegen. +static bool IsEmptyBlock(MachineBasicBlock *MBB) { + return MBB->getFirstNonDebugInstr() == MBB->end(); +} + +// Blocks with only debug info and branches should be considered the same +// as blocks with only branches. +static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) { + MachineBasicBlock::iterator I = MBB->getFirstNonDebugInstr(); + assert(I != MBB->end() && "empty block!"); + return I->isBranch(); +} + +/// IsBetterFallthrough - Return true if it would be clearly better to +/// fall-through to MBB1 than to fall through into MBB2. This has to return +/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will +/// result in infinite loops. +static bool IsBetterFallthrough(MachineBasicBlock *MBB1, + MachineBasicBlock *MBB2) { + // Right now, we use a simple heuristic. If MBB2 ends with a call, and + // MBB1 doesn't, we prefer to fall through into MBB1. This allows us to + // optimize branches that branch to either a return block or an assert block + // into a fallthrough to the return. + MachineBasicBlock::iterator MBB1I = MBB1->getLastNonDebugInstr(); + MachineBasicBlock::iterator MBB2I = MBB2->getLastNonDebugInstr(); + if (MBB1I == MBB1->end() || MBB2I == MBB2->end()) + return false; + + // If there is a clear successor ordering we make sure that one block + // will fall through to the next + if (MBB1->isSuccessor(MBB2)) return true; + if (MBB2->isSuccessor(MBB1)) return false; + + return MBB2I->isCall() && !MBB1I->isCall(); +} + +/// getBranchDebugLoc - Find and return, if any, the DebugLoc of the branch +/// instructions on the block. +static DebugLoc getBranchDebugLoc(MachineBasicBlock &MBB) { + MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr(); + if (I != MBB.end() && I->isBranch()) + return I->getDebugLoc(); + return DebugLoc(); +} + +/// OptimizeBlock - Analyze and optimize control flow related to the specified +/// block. This is never called on the entry block. +bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { + bool MadeChange = false; + MachineFunction &MF = *MBB->getParent(); +ReoptimizeBlock: + + MachineFunction::iterator FallThrough = MBB->getIterator(); + ++FallThrough; + + // Make sure MBB and FallThrough belong to the same funclet. + bool SameFunclet = true; + if (!FuncletMembership.empty() && FallThrough != MF.end()) { + auto MBBFunclet = FuncletMembership.find(MBB); + assert(MBBFunclet != FuncletMembership.end()); + auto FallThroughFunclet = FuncletMembership.find(&*FallThrough); + assert(FallThroughFunclet != FuncletMembership.end()); + SameFunclet = MBBFunclet->second == FallThroughFunclet->second; + } + + // If this block is empty, make everyone use its fall-through, not the block + // explicitly. Landing pads should not do this since the landing-pad table + // points to this block. Blocks with their addresses taken shouldn't be + // optimized away. + if (IsEmptyBlock(MBB) && !MBB->isEHPad() && !MBB->hasAddressTaken() && + SameFunclet) { + // Dead block? Leave for cleanup later. + if (MBB->pred_empty()) return MadeChange; + + if (FallThrough == MF.end()) { + // TODO: Simplify preds to not branch here if possible! + } else if (FallThrough->isEHPad()) { + // Don't rewrite to a landing pad fallthough. That could lead to the case + // where a BB jumps to more than one landing pad. + // TODO: Is it ever worth rewriting predecessors which don't already + // jump to a landing pad, and so can safely jump to the fallthrough? + } else { + // Rewrite all predecessors of the old block to go to the fallthrough + // instead. + while (!MBB->pred_empty()) { + MachineBasicBlock *Pred = *(MBB->pred_end()-1); + Pred->ReplaceUsesOfBlockWith(MBB, &*FallThrough); + } + // If MBB was the target of a jump table, update jump tables to go to the + // fallthrough instead. + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, &*FallThrough); + MadeChange = true; + } + return MadeChange; + } + + // Check to see if we can simplify the terminator of the block before this + // one. + MachineBasicBlock &PrevBB = *std::prev(MachineFunction::iterator(MBB)); + + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; + SmallVector<MachineOperand, 4> PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + if (!PriorUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB, + !PriorCond.empty()); + + // If the previous branch is conditional and both conditions go to the same + // destination, remove the branch, replacing it with an unconditional one or + // a fall-through. + if (PriorTBB && PriorTBB == PriorFBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + PriorCond.clear(); + if (PriorTBB != MBB) + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the previous block unconditionally falls through to this block and + // this block has no other predecessors, move the contents of this block + // into the prior block. This doesn't usually happen when SimplifyCFG + // has been used, but it can happen if tail merging splits a fall-through + // predecessor of a block. + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && + PrevBB.succ_size() == 1 && + !MBB->hasAddressTaken() && !MBB->isEHPad()) { + DEBUG(dbgs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *MBB); + // Remove redundant DBG_VALUEs first. + if (PrevBB.begin() != PrevBB.end()) { + MachineBasicBlock::iterator PrevBBIter = PrevBB.end(); + --PrevBBIter; + MachineBasicBlock::iterator MBBIter = MBB->begin(); + // Check if DBG_VALUE at the end of PrevBB is identical to the + // DBG_VALUE at the beginning of MBB. + while (PrevBBIter != PrevBB.begin() && MBBIter != MBB->end() + && PrevBBIter->isDebugValue() && MBBIter->isDebugValue()) { + if (!MBBIter->isIdenticalTo(PrevBBIter)) + break; + MachineInstr *DuplicateDbg = MBBIter; + ++MBBIter; -- PrevBBIter; + DuplicateDbg->eraseFromParent(); + } + } + PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin()); + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(MBB); + MadeChange = true; + return MadeChange; + } + + // If the previous branch *only* branches to *this* block (conditional or + // not) remove the branch. + if (PriorTBB == MBB && !PriorFBB) { + TII->RemoveBranch(PrevBB); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the prior block branches somewhere else on the condition and here if + // the condition is false, remove the uncond second branch. + if (PriorFBB == MBB) { + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + + // If the prior block branches here on true and somewhere else on false, and + // if the branch condition is reversible, reverse the branch to create a + // fall-through. + if (PriorTBB == MBB) { + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + + // If this block has no successors (e.g. it is a return block or ends with + // a call to a no-return function like abort or __cxa_throw) and if the pred + // falls through into this block, and if it would otherwise fall through + // into the block after this, move this block to the end of the function. + // + // We consider it more likely that execution will stay in the function (e.g. + // due to loops) than it is to exit it. This asserts in loops etc, moving + // the assert condition out of the loop body. + if (MBB->succ_empty() && !PriorCond.empty() && !PriorFBB && + MachineFunction::iterator(PriorTBB) == FallThrough && + !MBB->canFallThrough()) { + bool DoTransform = true; + + // We have to be careful that the succs of PredBB aren't both no-successor + // blocks. If neither have successors and if PredBB is the second from + // last block in the function, we'd just keep swapping the two blocks for + // last. Only do the swap if one is clearly better to fall through than + // the other. + if (FallThrough == --MF.end() && + !IsBetterFallthrough(PriorTBB, MBB)) + DoTransform = false; + + if (DoTransform) { + // Reverse the branch so we will fall through on the previous true cond. + SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); + if (!TII->ReverseBranchCondition(NewPriorCond)) { + DEBUG(dbgs() << "\nMoving MBB: " << *MBB + << "To make fallthrough to: " << *PriorTBB << "\n"); + + DebugLoc dl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl); + + // Move this block to the end of the function. + MBB->moveAfter(&MF.back()); + MadeChange = true; + ++NumBranchOpts; + return MadeChange; + } + } + } + } + + // Analyze the branch in the current block. + MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr; + SmallVector<MachineOperand, 4> CurCond; + bool CurUnAnalyzable= TII->AnalyzeBranch(*MBB, CurTBB, CurFBB, CurCond, true); + if (!CurUnAnalyzable) { + // If the CFG for the prior block has extra edges, remove them. + MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty()); + + // If this is a two-way branch, and the FBB branches to this block, reverse + // the condition so the single-basic-block loop is faster. Instead of: + // Loop: xxx; jcc Out; jmp Loop + // we want: + // Loop: xxx; jncc Loop; jmp Out + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + SmallVector<MachineOperand, 4> NewCond(CurCond); + if (!TII->ReverseBranchCondition(NewCond)) { + DebugLoc dl = getBranchDebugLoc(*MBB); + TII->RemoveBranch(*MBB); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); + MadeChange = true; + ++NumBranchOpts; + goto ReoptimizeBlock; + } + } + + // If this branch is the only thing in its block, see if we can forward + // other blocks across it. + if (CurTBB && CurCond.empty() && !CurFBB && + IsBranchOnlyBlock(MBB) && CurTBB != MBB && + !MBB->hasAddressTaken() && !MBB->isEHPad()) { + DebugLoc dl = getBranchDebugLoc(*MBB); + // This block may contain just an unconditional branch. Because there can + // be 'non-branch terminators' in the block, try removing the branch and + // then seeing if the block is empty. + TII->RemoveBranch(*MBB); + // If the only things remaining in the block are debug info, remove these + // as well, so this will behave the same as an empty block in non-debug + // mode. + if (IsEmptyBlock(MBB)) { + // Make the block empty, losing the debug info (we could probably + // improve this in some cases.) + MBB->erase(MBB->begin(), MBB->end()); + } + // If this block is just an unconditional branch to CurTBB, we can + // usually completely eliminate the block. The only case we cannot + // completely eliminate the block is when the block before this one + // falls through into MBB and we can't understand the prior block's branch + // condition. + if (MBB->empty()) { + bool PredHasNoFallThrough = !PrevBB.canFallThrough(); + if (PredHasNoFallThrough || !PriorUnAnalyzable || + !PrevBB.isSuccessor(MBB)) { + // If the prior block falls through into us, turn it into an + // explicit branch to us to make updates simpler. + if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) && + PriorTBB != MBB && PriorFBB != MBB) { + if (!PriorTBB) { + assert(PriorCond.empty() && !PriorFBB && + "Bad branch analysis"); + PriorTBB = MBB; + } else { + assert(!PriorFBB && "Machine CFG out of date!"); + PriorFBB = MBB; + } + DebugLoc pdl = getBranchDebugLoc(PrevBB); + TII->RemoveBranch(PrevBB); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl); + } + + // Iterate through all the predecessors, revectoring each in-turn. + size_t PI = 0; + bool DidChange = false; + bool HasBranchToSelf = false; + while(PI != MBB->pred_size()) { + MachineBasicBlock *PMBB = *(MBB->pred_begin() + PI); + if (PMBB == MBB) { + // If this block has an uncond branch to itself, leave it. + ++PI; + HasBranchToSelf = true; + } else { + DidChange = true; + PMBB->ReplaceUsesOfBlockWith(MBB, CurTBB); + // If this change resulted in PMBB ending in a conditional + // branch where both conditions go to the same destination, + // change this to an unconditional branch (and fix the CFG). + MachineBasicBlock *NewCurTBB = nullptr, *NewCurFBB = nullptr; + SmallVector<MachineOperand, 4> NewCurCond; + bool NewCurUnAnalyzable = TII->AnalyzeBranch(*PMBB, NewCurTBB, + NewCurFBB, NewCurCond, true); + if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { + DebugLoc pdl = getBranchDebugLoc(*PMBB); + TII->RemoveBranch(*PMBB); + NewCurCond.clear(); + TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl); + MadeChange = true; + ++NumBranchOpts; + PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false); + } + } + } + + // Change any jumptables to go to the new MBB. + if (MachineJumpTableInfo *MJTI = MF.getJumpTableInfo()) + MJTI->ReplaceMBBInJumpTables(MBB, CurTBB); + if (DidChange) { + ++NumBranchOpts; + MadeChange = true; + if (!HasBranchToSelf) return MadeChange; + } + } + } + + // Add the branch back if the block is more than just an uncond branch. + TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl); + } + } + + // If the prior block doesn't fall through into this block, and if this + // block doesn't fall through into some other block, see if we can find a + // place to move this block where a fall-through will happen. + if (!PrevBB.canFallThrough()) { + + // Now we know that there was no fall-through into this block, check to + // see if it has a fall-through into its successor. + bool CurFallsThru = MBB->canFallThrough(); + + if (!MBB->isEHPad()) { + // Check all the predecessors of this block. If one of them has no fall + // throughs, move this block right after it. + for (MachineBasicBlock *PredBB : MBB->predecessors()) { + // Analyze the branch at the end of the pred. + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + if (PredBB != MBB && !PredBB->canFallThrough() && + !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) + && (!CurFallsThru || !CurTBB || !CurFBB) + && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { + // If the current block doesn't fall through, just move it. + // If the current block can fall through and does not end with a + // conditional branch, we need to append an unconditional jump to + // the (current) next block. To avoid a possible compile-time + // infinite loop, move blocks only backward in this case. + // Also, if there are already 2 branches here, we cannot add a third; + // this means we have the case + // Bcc next + // B elsewhere + // next: + if (CurFallsThru) { + MachineBasicBlock *NextBB = &*std::next(MBB->getIterator()); + CurCond.clear(); + TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc()); + } + MBB->moveAfter(PredBB); + MadeChange = true; + goto ReoptimizeBlock; + } + } + } + + if (!CurFallsThru) { + // Check all successors to see if we can move this block before it. + for (MachineBasicBlock *SuccBB : MBB->successors()) { + // Analyze the branch at the end of the block before the succ. + MachineFunction::iterator SuccPrev = --SuccBB->getIterator(); + + // If this block doesn't already fall-through to that successor, and if + // the succ doesn't already have a block that can fall through into it, + // and if the successor isn't an EH destination, we can arrange for the + // fallthrough to happen. + if (SuccBB != MBB && &*SuccPrev != MBB && + !SuccPrev->canFallThrough() && !CurUnAnalyzable && + !SuccBB->isEHPad()) { + MBB->moveBefore(SuccBB); + MadeChange = true; + goto ReoptimizeBlock; + } + } + + // Okay, there is no really great place to put this block. If, however, + // the block before this one would be a fall-through if this block were + // removed, move this block to the end of the function. + MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; + SmallVector<MachineOperand, 4> PrevCond; + // We're looking for cases where PrevBB could possibly fall through to + // FallThrough, but if FallThrough is an EH pad that wouldn't be useful + // so here we skip over any EH pads so we might have a chance to find + // a branch target from PrevBB. + while (FallThrough != MF.end() && FallThrough->isEHPad()) + ++FallThrough; + // Now check to see if the current block is sitting between PrevBB and + // a block to which it could fall through. + if (FallThrough != MF.end() && + !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && + PrevBB.isSuccessor(&*FallThrough)) { + MBB->moveAfter(&MF.back()); + MadeChange = true; + return MadeChange; + } + } + } + + return MadeChange; +} + +//===----------------------------------------------------------------------===// +// Hoist Common Code +//===----------------------------------------------------------------------===// + +/// HoistCommonCode - Hoist common instruction sequences at the start of basic +/// blocks to their common predecessor. +bool BranchFolder::HoistCommonCode(MachineFunction &MF) { + bool MadeChange = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = &*I++; + MadeChange |= HoistCommonCodeInSuccs(MBB); + } + + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock *SuccBB : BB->successors()) + if (SuccBB != TrueBB) + return SuccBB; + return nullptr; +} + +template <class Container> +static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI, + Container &Set) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Set.insert(*AI); + } else { + Set.insert(Reg); + } +} + +/// findHoistingInsertPosAndDeps - Find the location to move common instructions +/// in successors to. The location is usually just before the terminator, +/// however if the terminator is a conditional branch and its previous +/// instruction is the flag setting instruction, the previous instruction is +/// the preferred location. This function also gathers uses and defs of the +/// instructions from the insertion point to the end of the block. The data is +/// used by HoistCommonCodeInSuccs to ensure safety. +static +MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + SmallSet<unsigned,4> &Uses, + SmallSet<unsigned,4> &Defs) { + MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); + if (!TII->isUnpredicatedTerminator(Loc)) + return MBB->end(); + + for (const MachineOperand &MO : Loc->operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + addRegAndItsAliases(Reg, TRI, Uses); + } else { + if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + + // If the terminator defines a register, make sure we don't hoist + // the instruction whose def might be clobbered by the terminator. + addRegAndItsAliases(Reg, TRI, Defs); + } + } + + if (Uses.empty()) + return Loc; + if (Loc == MBB->begin()) + return MBB->end(); + + // The terminator is probably a conditional branch, try not to separate the + // branch from condition setting instruction. + MachineBasicBlock::iterator PI = Loc; + --PI; + while (PI != MBB->begin() && PI->isDebugValue()) + --PI; + + bool IsDef = false; + for (const MachineOperand &MO : PI->operands()) { + // If PI has a regmask operand, it is probably a call. Separate away. + if (MO.isRegMask()) + return Loc; + if (!MO.isReg() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (Uses.count(Reg)) { + IsDef = true; + break; + } + } + if (!IsDef) + // The condition setting instruction is not just before the conditional + // branch. + return Loc; + + // Be conservative, don't insert instruction above something that may have + // side-effects. And since it's potentially bad to separate flag setting + // instruction from the conditional branch, just abort the optimization + // completely. + // Also avoid moving code above predicated instruction since it's hard to + // reason about register liveness with predicated instruction. + bool DontMoveAcrossStore = true; + if (!PI->isSafeToMove(nullptr, DontMoveAcrossStore) || TII->isPredicated(PI)) + return MBB->end(); + + + // Find out what registers are live. Note this routine is ignoring other live + // registers which are only used by instructions in successor blocks. + for (const MachineOperand &MO : PI->operands()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isUse()) { + addRegAndItsAliases(Reg, TRI, Uses); + } else { + if (Uses.erase(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + Uses.erase(*SubRegs); // Use sub-registers to be conservative + } + } + addRegAndItsAliases(Reg, TRI, Defs); + } + } + + return PI; +} + +/// HoistCommonCodeInSuccs - If the successors of MBB has common instruction +/// sequence at the start of the function, move the instructions before MBB +/// terminator if it's legal. +bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond, true) || !TBB || Cond.empty()) + return false; + + if (!FBB) FBB = findFalseBlock(MBB, TBB); + if (!FBB) + // Malformed bcc? True and false blocks are the same? + return false; + + // Restrict the optimization to cases where MBB is the only predecessor, + // it is an obvious win. + if (TBB->pred_size() > 1 || FBB->pred_size() > 1) + return false; + + // Find a suitable position to hoist the common instructions to. Also figure + // out which registers are used or defined by instructions from the insertion + // point to the end of the block. + SmallSet<unsigned, 4> Uses, Defs; + MachineBasicBlock::iterator Loc = + findHoistingInsertPosAndDeps(MBB, TII, TRI, Uses, Defs); + if (Loc == MBB->end()) + return false; + + bool HasDups = false; + SmallVector<unsigned, 4> LocalDefs; + SmallSet<unsigned, 4> LocalDefsSet; + MachineBasicBlock::iterator TIB = TBB->begin(); + MachineBasicBlock::iterator FIB = FBB->begin(); + MachineBasicBlock::iterator TIE = TBB->end(); + MachineBasicBlock::iterator FIE = FBB->end(); + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(FIB, MachineInstr::CheckKillDead)) + break; + + if (TII->isPredicated(TIB)) + // Hard to reason about register liveness with predicated instruction. + break; + + bool IsSafe = true; + for (MachineOperand &MO : TIB->operands()) { + // Don't attempt to hoist instructions with register masks. + if (MO.isRegMask()) { + IsSafe = false; + break; + } + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + if (Uses.count(Reg)) { + // Avoid clobbering a register that's used by the instruction at + // the point of insertion. + IsSafe = false; + break; + } + + if (Defs.count(Reg) && !MO.isDead()) { + // Don't hoist the instruction if the def would be clobber by the + // instruction at the point insertion. FIXME: This is overly + // conservative. It should be possible to hoist the instructions + // in BB2 in the following example: + // BB1: + // r1, eflag = op1 r2, r3 + // brcc eflag + // + // BB2: + // r1 = op2, ... + // = op3, r1<kill> + IsSafe = false; + break; + } + } else if (!LocalDefsSet.count(Reg)) { + if (Defs.count(Reg)) { + // Use is defined by the instruction at the point of insertion. + IsSafe = false; + break; + } + + if (MO.isKill() && Uses.count(Reg)) + // Kills a register that's read by the instruction at the point of + // insertion. Remove the kill marker. + MO.setIsKill(false); + } + } + if (!IsSafe) + break; + + bool DontMoveAcrossStore = true; + if (!TIB->isSafeToMove(nullptr, DontMoveAcrossStore)) + break; + + // Remove kills from LocalDefsSet, these registers had short live ranges. + for (const MachineOperand &MO : TIB->operands()) { + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || !LocalDefsSet.count(Reg)) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LocalDefsSet.erase(*AI); + } else { + LocalDefsSet.erase(Reg); + } + } + + // Track local defs so we can update liveins. + for (const MachineOperand &MO : TIB->operands()) { + if (!MO.isReg() || !MO.isDef() || MO.isDead()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + LocalDefs.push_back(Reg); + addRegAndItsAliases(Reg, TRI, LocalDefsSet); + } + + HasDups = true; + ++TIB; + ++FIB; + } + + if (!HasDups) + return false; + + MBB->splice(Loc, TBB, TBB->begin(), TIB); + FBB->erase(FBB->begin(), FIB); + + // Update livein's. + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Def = LocalDefs[i]; + if (LocalDefsSet.count(Def)) { + TBB->addLiveIn(Def); + FBB->addLiveIn(Def); + } + } + + ++NumHoist; + return true; +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h new file mode 100644 index 0000000..d759d53 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -0,0 +1,148 @@ +//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_BRANCHFOLDING_H +#define LLVM_LIB_CODEGEN_BRANCHFOLDING_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/Support/BlockFrequency.h" +#include <vector> + +namespace llvm { + class MachineBlockFrequencyInfo; + class MachineBranchProbabilityInfo; + class MachineFunction; + class MachineModuleInfo; + class RegScavenger; + class TargetInstrInfo; + class TargetRegisterInfo; + + class LLVM_LIBRARY_VISIBILITY BranchFolder { + public: + explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist, + const MachineBlockFrequencyInfo &MBFI, + const MachineBranchProbabilityInfo &MBPI); + + bool OptimizeFunction(MachineFunction &MF, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + MachineModuleInfo *mmi); + private: + class MergePotentialsElt { + unsigned Hash; + MachineBasicBlock *Block; + public: + MergePotentialsElt(unsigned h, MachineBasicBlock *b) + : Hash(h), Block(b) {} + + unsigned getHash() const { return Hash; } + MachineBasicBlock *getBlock() const { return Block; } + + void setBlock(MachineBasicBlock *MBB) { + Block = MBB; + } + + bool operator<(const MergePotentialsElt &) const; + }; + typedef std::vector<MergePotentialsElt>::iterator MPIterator; + std::vector<MergePotentialsElt> MergePotentials; + SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging; + DenseMap<const MachineBasicBlock *, int> FuncletMembership; + + class SameTailElt { + MPIterator MPIter; + MachineBasicBlock::iterator TailStartPos; + public: + SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp) + : MPIter(mp), TailStartPos(tsp) {} + + MPIterator getMPIter() const { + return MPIter; + } + MergePotentialsElt &getMergePotentialsElt() const { + return *getMPIter(); + } + MachineBasicBlock::iterator getTailStartPos() const { + return TailStartPos; + } + unsigned getHash() const { + return getMergePotentialsElt().getHash(); + } + MachineBasicBlock *getBlock() const { + return getMergePotentialsElt().getBlock(); + } + bool tailIsWholeBlock() const { + return TailStartPos == getBlock()->begin(); + } + + void setBlock(MachineBasicBlock *MBB) { + getMergePotentialsElt().setBlock(MBB); + } + void setTailStartPos(MachineBasicBlock::iterator Pos) { + TailStartPos = Pos; + } + }; + std::vector<SameTailElt> SameTails; + + bool EnableTailMerge; + bool EnableHoistCommonCode; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineModuleInfo *MMI; + RegScavenger *RS; + + /// \brief This class keeps track of branch frequencies of newly created + /// blocks and tail-merged blocks. + class MBFIWrapper { + public: + MBFIWrapper(const MachineBlockFrequencyInfo &I) : MBFI(I) {} + BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const; + void setBlockFreq(const MachineBasicBlock *MBB, BlockFrequency F); + + private: + const MachineBlockFrequencyInfo &MBFI; + DenseMap<const MachineBasicBlock *, BlockFrequency> MergedBBFreq; + }; + + MBFIWrapper MBBFreqInfo; + const MachineBranchProbabilityInfo &MBPI; + + bool TailMergeBlocks(MachineFunction &MF); + bool TryTailMergeBlocks(MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB); + void MaintainLiveIns(MachineBasicBlock *CurMBB, + MachineBasicBlock *NewMBB); + void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, + MachineBasicBlock *NewDest); + MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB); + unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, + MachineBasicBlock *SuccBB, + MachineBasicBlock *PredBB); + void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, + MachineBasicBlock* PredBB); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); + + bool OptimizeBranches(MachineFunction &MF); + bool OptimizeBlock(MachineBasicBlock *MBB); + void RemoveDeadBlock(MachineBasicBlock *MBB); + bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); + + bool HoistCommonCode(MachineFunction &MF); + bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB); + }; +} + +#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */ diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp new file mode 100644 index 0000000..abc655a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -0,0 +1,230 @@ +//===------------------------ CalcSpillWeights.cpp ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "calcspillweights" + +void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, + MachineFunction &MF, + VirtRegMap *VRM, + const MachineLoopInfo &MLI, + const MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo::NormalizingFn norm) { + DEBUG(dbgs() << "********** Compute Spill Weights **********\n" + << "********** Function: " << MF.getName() << '\n'); + + MachineRegisterInfo &MRI = MF.getRegInfo(); + VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm); + for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI.reg_nodbg_empty(Reg)) + continue; + VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); + } +} + +// Return the preferred allocation register for reg, given a COPY instruction. +static unsigned copyHint(const MachineInstr *mi, unsigned reg, + const TargetRegisterInfo &tri, + const MachineRegisterInfo &mri) { + unsigned sub, hreg, hsub; + if (mi->getOperand(0).getReg() == reg) { + sub = mi->getOperand(0).getSubReg(); + hreg = mi->getOperand(1).getReg(); + hsub = mi->getOperand(1).getSubReg(); + } else { + sub = mi->getOperand(1).getSubReg(); + hreg = mi->getOperand(0).getReg(); + hsub = mi->getOperand(0).getSubReg(); + } + + if (!hreg) + return 0; + + if (TargetRegisterInfo::isVirtualRegister(hreg)) + return sub == hsub ? hreg : 0; + + const TargetRegisterClass *rc = mri.getRegClass(reg); + + // Only allow physreg hints in rc. + if (sub == 0) + return rc->contains(hreg) ? hreg : 0; + + // reg:sub should match the physreg hreg. + return tri.getMatchingSuperReg(hreg, sub, rc); +} + +// Check if all values in LI are rematerializable +static bool isRematerializable(const LiveInterval &LI, + const LiveIntervals &LIS, + VirtRegMap *VRM, + const TargetInstrInfo &TII) { + unsigned Reg = LI.reg; + unsigned Original = VRM ? VRM->getOriginal(Reg) : 0; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + const VNInfo *VNI = *I; + if (VNI->isUnused()) + continue; + if (VNI->isPHIDef()) + return false; + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + + // Trace copies introduced by live range splitting. The inline + // spiller can rematerialize through these copies, so the spill + // weight must reflect this. + if (VRM) { + while (MI->isFullCopy()) { + // The copy destination must match the interval register. + if (MI->getOperand(0).getReg() != Reg) + return false; + + // Get the source register. + Reg = MI->getOperand(1).getReg(); + + // If the original (pre-splitting) registers match this + // copy came from a split. + if (!TargetRegisterInfo::isVirtualRegister(Reg) || + VRM->getOriginal(Reg) != Original) + return false; + + // Follow the copy live-in value. + const LiveInterval &SrcLI = LIS.getInterval(Reg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + VNI = SrcQ.valueIn(); + assert(VNI && "Copy from non-existing value"); + if (VNI->isPHIDef()) + return false; + MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Dead valno in interval"); + } + } + + if (!TII.isTriviallyReMaterializable(MI, LIS.getAliasAnalysis())) + return false; + } + return true; +} + +void +VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { + MachineRegisterInfo &mri = MF.getRegInfo(); + const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo(); + MachineBasicBlock *mbb = nullptr; + MachineLoop *loop = nullptr; + bool isExiting = false; + float totalWeight = 0; + unsigned numInstr = 0; // Number of instructions using li + SmallPtrSet<MachineInstr*, 8> visited; + + // Find the best physreg hint and the best virtreg hint. + float bestPhys = 0, bestVirt = 0; + unsigned hintPhys = 0, hintVirt = 0; + + // Don't recompute a target specific hint. + bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + + // Don't recompute spill weight for an unspillable register. + bool Spillable = li.isSpillable(); + + for (MachineRegisterInfo::reg_instr_iterator + I = mri.reg_instr_begin(li.reg), E = mri.reg_instr_end(); + I != E; ) { + MachineInstr *mi = &*(I++); + numInstr++; + if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) + continue; + if (!visited.insert(mi).second) + continue; + + float weight = 1.0f; + if (Spillable) { + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = Loops.getLoopFor(mbb); + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + weight = LiveIntervals::getSpillWeight( + writes, reads, &MBFI, mi); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; + } + + // Get allocation hints from copies. + if (noHint || !mi->isCopy()) + continue; + unsigned hint = copyHint(mi, li.reg, tri, mri); + if (!hint) + continue; + // Force hweight onto the stack so that x86 doesn't add hidden precision, + // making the comparison incorrectly pass (i.e., 1 > 1 == true??). + // + // FIXME: we probably shouldn't use floats at all. + volatile float hweight = Hint[hint] += weight; + if (TargetRegisterInfo::isPhysicalRegister(hint)) { + if (hweight > bestPhys && mri.isAllocatable(hint)) + bestPhys = hweight, hintPhys = hint; + } else { + if (hweight > bestVirt) + bestVirt = hweight, hintVirt = hint; + } + } + + Hint.clear(); + + // Always prefer the physreg hint. + if (unsigned hint = hintPhys ? hintPhys : hintVirt) { + mri.setRegAllocationHint(li.reg, 0, hint); + // Weakly boost the spill weight of hinted registers. + totalWeight *= 1.01F; + } + + // If the live interval was already unspillable, leave it that way. + if (!Spillable) + return; + + // Mark li as unspillable if all live ranges are tiny. + if (li.isZeroLength(LIS.getSlotIndexes())) { + li.markNotSpillable(); + return; + } + + // If all of the definitions of the interval are re-materializable, + // it is a preferred candidate for spilling. + // FIXME: this gets much more complicated once we support non-trivial + // re-materialization. + if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo())) + totalWeight *= 0.5F; + + li.weight = normalize(totalWeight, li.getSize(), numInstr); +} diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp new file mode 100644 index 0000000..23c0d54 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -0,0 +1,250 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, + SmallVectorImpl<CCValAssign> &locs, LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), MF(mf), + TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Unknown) { + // No stack is used. + StackOffset = 0; + MaxStackArgAlign = 1; + + clearByValRegsInfo(); + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +/// Allocate space on the stack large enough to pass an argument by value. +/// The size and alignment information of the argument is encoded in +/// its parameter attribute. +void CCState::HandleByVal(unsigned ValNo, MVT ValVT, + MVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + MF.getFrameInfo()->ensureMaxAlignment(Align); + MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align); + Size = unsigned(RoundUpToAlignment(Size, MinAlign)); + unsigned Offset = AllocateStack(Size, Align); + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI) + UsedRegs[*AI/32] |= 1 << (*AI&31); +} + +/// Analyze an array of argument values, +/// incorporating info about the formals into this state. +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Formal argument #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'; +#endif + llvm_unreachable(nullptr); + } + } +} + +/// Analyze the return values of a function, returning true if the return can +/// be performed without sret-demotion and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + +/// Analyze the returned values of a return, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + MVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Return operand #" << i << " has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(nullptr); + } + } +} + +/// Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'; +#endif + llvm_unreachable(nullptr); + } + } +} + +/// Same as above except it takes vectors of types and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs, + SmallVectorImpl<ISD::ArgFlagsTy> &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString() << '\n'; +#endif + llvm_unreachable(nullptr); + } + } +} + +/// Analyze the return values of a call, incorporating info about the passed +/// values into this state. +void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + MVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call result #" << i << " has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(nullptr); + } + } +} + +/// Same as above except it's specialized for calls that produce a single value. +void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { +#ifndef NDEBUG + dbgs() << "Call result has unhandled type " + << EVT(VT).getEVTString() << '\n'; +#endif + llvm_unreachable(nullptr); + } +} + +static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) { + if (VT.isVector()) + return true; // Assume -msse-regparm might be in effect. + if (!VT.isInteger()) + return false; + if (CC == CallingConv::X86_VectorCall || CC == CallingConv::X86_FastCall) + return true; + return false; +} + +void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs, + MVT VT, CCAssignFn Fn) { + unsigned SavedStackOffset = StackOffset; + unsigned SavedMaxStackArgAlign = MaxStackArgAlign; + unsigned NumLocs = Locs.size(); + + // Set the 'inreg' flag if it is used for this calling convention. + ISD::ArgFlagsTy Flags; + if (isValueTypeInRegForCC(CallingConv, VT)) + Flags.setInReg(); + + // Allocate something of this value type repeatedly until we get assigned a + // location in memory. + bool HaveRegParm = true; + while (HaveRegParm) { + if (Fn(0, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call has unhandled type " << EVT(VT).getEVTString() + << " while computing remaining regparms\n"; +#endif + llvm_unreachable(nullptr); + } + HaveRegParm = Locs.back().isRegLoc(); + } + + // Copy all the registers from the value locations we added. + assert(NumLocs < Locs.size() && "CC assignment failed to add location"); + for (unsigned I = NumLocs, E = Locs.size(); I != E; ++I) + if (Locs[I].isRegLoc()) + Regs.push_back(MCPhysReg(Locs[I].getLocReg())); + + // Clear the assigned values and stack memory. We leave the registers marked + // as allocated so that future queries don't return the same registers, i.e. + // when i64 and f64 are both passed in GPRs. + StackOffset = SavedStackOffset; + MaxStackArgAlign = SavedMaxStackArgAlign; + Locs.resize(NumLocs); +} + +void CCState::analyzeMustTailForwardedRegisters( + SmallVectorImpl<ForwardedRegister> &Forwards, ArrayRef<MVT> RegParmTypes, + CCAssignFn Fn) { + // Oftentimes calling conventions will not user register parameters for + // variadic functions, so we need to assume we're not variadic so that we get + // all the registers that might be used in a non-variadic call. + SaveAndRestore<bool> SavedVarArg(IsVarArg, false); + + for (MVT RegVT : RegParmTypes) { + SmallVector<MCPhysReg, 8> RemainingRegs; + getRemainingRegParmsForType(RemainingRegs, RegVT, Fn); + const TargetLowering *TL = MF.getSubtarget().getTargetLowering(); + const TargetRegisterClass *RC = TL->getRegClassFor(RegVT); + for (MCPhysReg PReg : RemainingRegs) { + unsigned VReg = MF.addLiveIn(PReg, RC); + Forwards.push_back(ForwardedRegister(VReg, PReg, RegVT)); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp new file mode 100644 index 0000000..dc13b5b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -0,0 +1,86 @@ +//===-- CodeGen.cpp -------------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the common initialization routines for the +// CodeGen library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/InitializePasses.h" +#include "llvm-c/Initialization.h" +#include "llvm/PassRegistry.h" + +using namespace llvm; + +/// initializeCodeGen - Initialize all passes linked into the CodeGen library. +void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeAtomicExpandPass(Registry); + initializeBranchFolderPassPass(Registry); + initializeCodeGenPreparePass(Registry); + initializeDeadMachineInstructionElimPass(Registry); + initializeDwarfEHPreparePass(Registry); + initializeEarlyIfConverterPass(Registry); + initializeExpandISelPseudosPass(Registry); + initializeExpandPostRAPass(Registry); + initializeFinalizeMachineBundlesPass(Registry); + initializeFuncletLayoutPass(Registry); + initializeGCMachineCodeAnalysisPass(Registry); + initializeGCModuleInfoPass(Registry); + initializeIfConverterPass(Registry); + initializeLiveDebugVariablesPass(Registry); + initializeLiveIntervalsPass(Registry); + initializeLiveStacksPass(Registry); + initializeLiveVariablesPass(Registry); + initializeLocalStackSlotPassPass(Registry); + initializeLowerIntrinsicsPass(Registry); + initializeMachineBlockFrequencyInfoPass(Registry); + initializeMachineBlockPlacementPass(Registry); + initializeMachineBlockPlacementStatsPass(Registry); + initializeMachineCSEPass(Registry); + initializeImplicitNullChecksPass(Registry); + initializeMachineCombinerPass(Registry); + initializeMachineCopyPropagationPass(Registry); + initializeMachineDominatorTreePass(Registry); + initializeMachineFunctionPrinterPassPass(Registry); + initializeMachineLICMPass(Registry); + initializeMachineLoopInfoPass(Registry); + initializeMachineModuleInfoPass(Registry); + initializeMachinePostDominatorTreePass(Registry); + initializeMachineSchedulerPass(Registry); + initializeMachineSinkingPass(Registry); + initializeMachineVerifierPassPass(Registry); + initializeOptimizePHIsPass(Registry); + initializePEIPass(Registry); + initializePHIEliminationPass(Registry); + initializePeepholeOptimizerPass(Registry); + initializePostMachineSchedulerPass(Registry); + initializePostRASchedulerPass(Registry); + initializeProcessImplicitDefsPass(Registry); + initializeRegisterCoalescerPass(Registry); + initializeShrinkWrapPass(Registry); + initializeSlotIndexesPass(Registry); + initializeStackColoringPass(Registry); + initializeStackMapLivenessPass(Registry); + initializeLiveDebugValuesPass(Registry); + initializeStackProtectorPass(Registry); + initializeStackSlotColoringPass(Registry); + initializeTailDuplicatePassPass(Registry); + initializeTargetPassConfigPass(Registry); + initializeTwoAddressInstructionPassPass(Registry); + initializeUnpackMachineBundlesPass(Registry); + initializeUnreachableBlockElimPass(Registry); + initializeUnreachableMachineBlockElimPass(Registry); + initializeVirtRegMapPass(Registry); + initializeVirtRegRewriterPass(Registry); + initializeWinEHPreparePass(Registry); +} + +void LLVMInitializeCodeGen(LLVMPassRegistryRef R) { + initializeCodeGen(*unwrap(R)); +} diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp new file mode 100644 index 0000000..6fbdea8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -0,0 +1,5572 @@ +//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass munges the code in the input function to better prepare it for +// SelectionDAG-based code generation. This works around limitations in it's +// basic-block-at-a-time approach. It should eventually be removed. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueMap.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SimplifyLibCalls.h" +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "codegenprepare" + +STATISTIC(NumBlocksElim, "Number of blocks eliminated"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " + "sunken Cmps"); +STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " + "of sunken Casts"); +STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " + "computations were sunk"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumAndsAdded, + "Number of and mask instructions added to form ext loads"); +STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); +STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); +STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); +STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); +STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed"); + +static cl::opt<bool> DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); + +static cl::opt<bool> + DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), + cl::desc("Disable GC optimizations in CodeGenPrepare")); + +static cl::opt<bool> DisableSelectToBranch( + "disable-cgp-select2branch", cl::Hidden, cl::init(false), + cl::desc("Disable select to branch conversion.")); + +static cl::opt<bool> AddrSinkUsingGEPs( + "addr-sink-using-gep", cl::Hidden, cl::init(false), + cl::desc("Address sinking in CGP using GEPs.")); + +static cl::opt<bool> EnableAndCmpSinking( + "enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); + +static cl::opt<bool> DisableStoreExtract( + "disable-cgp-store-extract", cl::Hidden, cl::init(false), + cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); + +static cl::opt<bool> StressStoreExtract( + "stress-cgp-store-extract", cl::Hidden, cl::init(false), + cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); + +static cl::opt<bool> DisableExtLdPromotion( + "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), + cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " + "CodeGenPrepare")); + +static cl::opt<bool> StressExtLdPromotion( + "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), + cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " + "optimization in CodeGenPrepare")); + +namespace { +typedef SmallPtrSet<Instruction *, 16> SetOfInstrs; +typedef PointerIntPair<Type *, 1, bool> TypeIsSExt; +typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy; +class TypePromotionTransaction; + + class CodeGenPrepare : public FunctionPass { + const TargetMachine *TM; + const TargetLowering *TLI; + const TargetTransformInfo *TTI; + const TargetLibraryInfo *TLInfo; + + /// As we scan instructions optimizing them, this is the next instruction + /// to optimize. Transforms that can invalidate this should update it. + BasicBlock::iterator CurInstIterator; + + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. + ValueMap<Value*, Value*> SunkAddrs; + + /// Keeps track of all instructions inserted for the current function. + SetOfInstrs InsertedInsts; + /// Keeps track of the type of the related instruction before their + /// promotion for the current function. + InstrToOrigTy PromotedInsts; + + /// True if CFG is modified in any way. + bool ModifiedDT; + + /// True if optimizing for size. + bool OptSize; + + /// DataLayout for the Function being processed. + const DataLayout *DL; + + public: + static char ID; // Pass identification, replacement for typeid + explicit CodeGenPrepare(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr), TTI(nullptr), DL(nullptr) { + initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); + } + bool runOnFunction(Function &F) override; + + const char *getPassName() const override { return "CodeGen Prepare"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + } + + private: + bool eliminateFallThrough(Function &F); + bool eliminateMostlyEmptyBlocks(Function &F); + bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; + void eliminateMostlyEmptyBlock(BasicBlock *BB); + bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT); + bool optimizeInst(Instruction *I, bool& ModifiedDT); + bool optimizeMemoryInst(Instruction *I, Value *Addr, + Type *AccessTy, unsigned AS); + bool optimizeInlineAsmInst(CallInst *CS); + bool optimizeCallInst(CallInst *CI, bool& ModifiedDT); + bool moveExtToFormExtLoad(Instruction *&I); + bool optimizeExtUses(Instruction *I); + bool optimizeLoadExt(LoadInst *I); + bool optimizeSelectInst(SelectInst *SI); + bool optimizeShuffleVectorInst(ShuffleVectorInst *SI); + bool optimizeSwitchInst(SwitchInst *CI); + bool optimizeExtractElementInst(Instruction *Inst); + bool dupRetToEnableTailCallOpts(BasicBlock *BB); + bool placeDbgValues(Function &F); + bool sinkAndCmp(Function &F); + bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI, + Instruction *&Inst, + const SmallVectorImpl<Instruction *> &Exts, + unsigned CreatedInstCost); + bool splitBranchCondition(Function &F); + bool simplifyOffsetableRelocate(Instruction &I); + void stripInvariantGroupMetadata(Instruction &I); + }; +} + +char CodeGenPrepare::ID = 0; +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) + +FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { + return new CodeGenPrepare(TM); +} + +bool CodeGenPrepare::runOnFunction(Function &F) { + if (skipOptnoneFunction(F)) + return false; + + DL = &F.getParent()->getDataLayout(); + + bool EverMadeChange = false; + // Clear per function information. + InsertedInsts.clear(); + PromotedInsts.clear(); + + ModifiedDT = false; + if (TM) + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + OptSize = F.optForSize(); + + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (!OptSize && TLI && TLI->isSlowDivBypassed()) { + const DenseMap<unsigned int, unsigned int> &BypassWidths = + TLI->getBypassSlowDivWidths(); + BasicBlock* BB = &*F.begin(); + while (BB != nullptr) { + // bypassSlowDivision may create new BBs, but we don't want to reapply the + // optimization to those blocks. + BasicBlock* Next = BB->getNextNode(); + EverMadeChange |= bypassSlowDivision(BB, BypassWidths); + BB = Next; + } + } + + // Eliminate blocks that contain only PHI nodes and an + // unconditional branch. + EverMadeChange |= eliminateMostlyEmptyBlocks(F); + + // llvm.dbg.value is far away from the value then iSel may not be able + // handle it properly. iSel will drop llvm.dbg.value if it can not + // find a node corresponding to the value. + EverMadeChange |= placeDbgValues(F); + + // If there is a mask, compare against zero, and branch that can be combined + // into a single target instruction, push the mask and compare into branch + // users. Do this before OptimizeBlock -> OptimizeInst -> + // OptimizeCmpExpression, which perturbs the pattern being searched for. + if (!DisableBranchOpts) { + EverMadeChange |= sinkAndCmp(F); + EverMadeChange |= splitBranchCondition(F); + } + + bool MadeChange = true; + while (MadeChange) { + MadeChange = false; + for (Function::iterator I = F.begin(); I != F.end(); ) { + BasicBlock *BB = &*I++; + bool ModifiedDTOnIteration = false; + MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); + + // Restart BB iteration if the dominator tree of the Function was changed + if (ModifiedDTOnIteration) + break; + } + EverMadeChange |= MadeChange; + } + + SunkAddrs.clear(); + + if (!DisableBranchOpts) { + MadeChange = false; + SmallPtrSet<BasicBlock*, 8> WorkList; + for (BasicBlock &BB : F) { + SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); + MadeChange |= ConstantFoldTerminator(&BB, true); + if (!MadeChange) continue; + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } + + // Delete the dead blocks and any of their dead successors. + MadeChange |= !WorkList.empty(); + while (!WorkList.empty()) { + BasicBlock *BB = *WorkList.begin(); + WorkList.erase(BB); + SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); + + DeleteDeadBlock(BB); + + for (SmallVectorImpl<BasicBlock*>::iterator + II = Successors.begin(), IE = Successors.end(); II != IE; ++II) + if (pred_begin(*II) == pred_end(*II)) + WorkList.insert(*II); + } + + // Merge pairs of basic blocks with unconditional branches, connected by + // a single edge. + if (EverMadeChange || MadeChange) + MadeChange |= eliminateFallThrough(F); + + EverMadeChange |= MadeChange; + } + + if (!DisableGCOpts) { + SmallVector<Instruction *, 2> Statepoints; + for (BasicBlock &BB : F) + for (Instruction &I : BB) + if (isStatepoint(I)) + Statepoints.push_back(&I); + for (auto &I : Statepoints) + EverMadeChange |= simplifyOffsetableRelocate(*I); + } + + return EverMadeChange; +} + +/// Merge basic blocks which are connected by a single edge, where one of the +/// basic blocks has a single successor pointing to the other basic block, +/// which has a single predecessor. +bool CodeGenPrepare::eliminateFallThrough(Function &F) { + bool Changed = false; + // Scan all of the blocks in the function, except for the entry block. + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { + BasicBlock *BB = &*I++; + // If the destination block has a single pred, then this is a trivial + // edge, just collapse it. + BasicBlock *SinglePred = BB->getSinglePredecessor(); + + // Don't merge if BB's address is taken. + if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; + + BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); + if (Term && !Term->isConditional()) { + Changed = true; + DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); + // Remember if SinglePred was the entry block of the function. + // If so, we will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(BB, nullptr); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + // We have erased a block. Update the iterator. + I = BB->getIterator(); + } + } + return Changed; +} + +/// Eliminate blocks that contain only PHI nodes, debug info directives, and an +/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split +/// edges in ways that are non-optimal for isel. Start by eliminating these +/// blocks so we can split them the way we want them. +bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { + bool MadeChange = false; + // Note that this intentionally skips the entry block. + for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) { + BasicBlock *BB = &*I++; + + // If this block doesn't end with an uncond branch, ignore it. + BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); + if (!BI || !BI->isUnconditional()) + continue; + + // If the instruction before the branch (skipping debug info) isn't a phi + // node, then other stuff is happening here. + BasicBlock::iterator BBI = BI->getIterator(); + if (BBI != BB->begin()) { + --BBI; + while (isa<DbgInfoIntrinsic>(BBI)) { + if (BBI == BB->begin()) + break; + --BBI; + } + if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) + continue; + } + + // Do not break infinite loops. + BasicBlock *DestBB = BI->getSuccessor(0); + if (DestBB == BB) + continue; + + if (!canMergeBlocks(BB, DestBB)) + continue; + + eliminateMostlyEmptyBlock(BB); + MadeChange = true; + } + return MadeChange; +} + +/// Return true if we can merge BB into DestBB if there is a single +/// unconditional branch between them, and BB contains no other non-phi +/// instructions. +bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, + const BasicBlock *DestBB) const { + // We only want to eliminate blocks whose phi nodes are used by phi nodes in + // the successor. If there are more complex condition (e.g. preheaders), + // don't mess around with them. + BasicBlock::const_iterator BBI = BB->begin(); + while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { + for (const User *U : PN->users()) { + const Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != DestBB || !isa<PHINode>(UI)) + return false; + // If User is inside DestBB block and it is a PHINode then check + // incoming value. If incoming value is not from BB then this is + // a complex condition (e.g. preheaders) we want to avoid here. + if (UI->getParent() == DestBB) { + if (const PHINode *UPN = dyn_cast<PHINode>(UI)) + for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { + Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); + if (Insn && Insn->getParent() == BB && + Insn->getParent() != UPN->getIncomingBlock(I)) + return false; + } + } + } + } + + // If BB and DestBB contain any common predecessors, then the phi nodes in BB + // and DestBB may have conflicting incoming values for the block. If so, we + // can't merge the block. + const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); + if (!DestBBPN) return true; // no conflict. + + // Collect the preds of BB. + SmallPtrSet<const BasicBlock*, 16> BBPreds; + if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { + // It is faster to get preds from a PHI than with pred_iterator. + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + BBPreds.insert(BBPN->getIncomingBlock(i)); + } else { + BBPreds.insert(pred_begin(BB), pred_end(BB)); + } + + // Walk the preds of DestBB. + for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *Pred = DestBBPN->getIncomingBlock(i); + if (BBPreds.count(Pred)) { // Common predecessor? + BBI = DestBB->begin(); + while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { + const Value *V1 = PN->getIncomingValueForBlock(Pred); + const Value *V2 = PN->getIncomingValueForBlock(BB); + + // If V2 is a phi node in BB, look up what the mapped value will be. + if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) + if (V2PN->getParent() == BB) + V2 = V2PN->getIncomingValueForBlock(Pred); + + // If there is a conflict, bail out. + if (V1 != V2) return false; + } + } + } + + return true; +} + + +/// Eliminate a basic block that has only phi's and an unconditional branch in +/// it. +void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { + BranchInst *BI = cast<BranchInst>(BB->getTerminator()); + BasicBlock *DestBB = BI->getSuccessor(0); + + DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); + + // If the destination block has a single pred, then this is a trivial edge, + // just collapse it. + if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { + if (SinglePred != DestBB) { + // Remember if SinglePred was the entry block of the function. If so, we + // will need to move BB back to the entry position. + bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); + MergeBasicBlockIntoOnlyPred(DestBB, nullptr); + + if (isEntry && BB != &BB->getParent()->getEntryBlock()) + BB->moveBefore(&BB->getParent()->getEntryBlock()); + + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); + return; + } + } + + // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB + // to handle the new incoming edges it is about to have. + PHINode *PN; + for (BasicBlock::iterator BBI = DestBB->begin(); + (PN = dyn_cast<PHINode>(BBI)); ++BBI) { + // Remove the incoming value for BB, and remember it. + Value *InVal = PN->removeIncomingValue(BB, false); + + // Two options: either the InVal is a phi node defined in BB or it is some + // value that dominates BB. + PHINode *InValPhi = dyn_cast<PHINode>(InVal); + if (InValPhi && InValPhi->getParent() == BB) { + // Add all of the input values of the input PHI as inputs of this phi. + for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InValPhi->getIncomingValue(i), + InValPhi->getIncomingBlock(i)); + } else { + // Otherwise, add one instance of the dominating value for each edge that + // we will be adding. + if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { + for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) + PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); + } else { + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + PN->addIncoming(InVal, *PI); + } + } + } + + // The PHIs are now updated, change everything that refers to BB to use + // DestBB and remove BB. + BB->replaceAllUsesWith(DestBB); + BB->eraseFromParent(); + ++NumBlocksElim; + + DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); +} + +// Computes a map of base pointer relocation instructions to corresponding +// derived pointer relocation instructions given a vector of all relocate calls +static void computeBaseDerivedRelocateMap( + const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, + DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> + &RelocateInstMap) { + // Collect information in two maps: one primarily for locating the base object + // while filling the second map; the second map is the final structure holding + // a mapping between Base and corresponding Derived relocate calls + DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; + for (auto *ThisRelocate : AllRelocateCalls) { + auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), + ThisRelocate->getDerivedPtrIndex()); + RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); + } + for (auto &Item : RelocateIdxMap) { + std::pair<unsigned, unsigned> Key = Item.first; + if (Key.first == Key.second) + // Base relocation: nothing to insert + continue; + + GCRelocateInst *I = Item.second; + auto BaseKey = std::make_pair(Key.first, Key.first); + + // We're iterating over RelocateIdxMap so we cannot modify it. + auto MaybeBase = RelocateIdxMap.find(BaseKey); + if (MaybeBase == RelocateIdxMap.end()) + // TODO: We might want to insert a new base object relocate and gep off + // that, if there are enough derived object relocates. + continue; + + RelocateInstMap[MaybeBase->second].push_back(I); + } +} + +// Accepts a GEP and extracts the operands into a vector provided they're all +// small integer constants +static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, + SmallVectorImpl<Value *> &OffsetV) { + for (unsigned i = 1; i < GEP->getNumOperands(); i++) { + // Only accept small constant integer operands + auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); + if (!Op || Op->getZExtValue() > 20) + return false; + } + + for (unsigned i = 1; i < GEP->getNumOperands(); i++) + OffsetV.push_back(GEP->getOperand(i)); + return true; +} + +// Takes a RelocatedBase (base pointer relocation instruction) and Targets to +// replace, computes a replacement, and affects it. +static bool +simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, + const SmallVectorImpl<GCRelocateInst *> &Targets) { + bool MadeChange = false; + for (GCRelocateInst *ToReplace : Targets) { + assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && + "Not relocating a derived object of the original base object"); + if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { + // A duplicate relocate call. TODO: coalesce duplicates. + continue; + } + + if (RelocatedBase->getParent() != ToReplace->getParent()) { + // Base and derived relocates are in different basic blocks. + // In this case transform is only valid when base dominates derived + // relocate. However it would be too expensive to check dominance + // for each such relocate, so we skip the whole transformation. + continue; + } + + Value *Base = ToReplace->getBasePtr(); + auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); + if (!Derived || Derived->getPointerOperand() != Base) + continue; + + SmallVector<Value *, 2> OffsetV; + if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) + continue; + + // Create a Builder and replace the target callsite with a gep + assert(RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"); + + // Insert after RelocatedBase + IRBuilder<> Builder(RelocatedBase->getNextNode()); + Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); + + // If gc_relocate does not match the actual type, cast it to the right type. + // In theory, there must be a bitcast after gc_relocate if the type does not + // match, and we should reuse it to get the derived pointer. But it could be + // cases like this: + // bb1: + // ... + // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) + // br label %merge + // + // bb2: + // ... + // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) + // br label %merge + // + // merge: + // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] + // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* + // + // In this case, we can not find the bitcast any more. So we insert a new bitcast + // no matter there is already one or not. In this way, we can handle all cases, and + // the extra bitcast should be optimized away in later passes. + Value *ActualRelocatedBase = RelocatedBase; + if (RelocatedBase->getType() != Base->getType()) { + ActualRelocatedBase = + Builder.CreateBitCast(RelocatedBase, Base->getType()); + } + Value *Replacement = Builder.CreateGEP( + Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); + Replacement->takeName(ToReplace); + // If the newly generated derived pointer's type does not match the original derived + // pointer's type, cast the new derived pointer to match it. Same reasoning as above. + Value *ActualReplacement = Replacement; + if (Replacement->getType() != ToReplace->getType()) { + ActualReplacement = + Builder.CreateBitCast(Replacement, ToReplace->getType()); + } + ToReplace->replaceAllUsesWith(ActualReplacement); + ToReplace->eraseFromParent(); + + MadeChange = true; + } + return MadeChange; +} + +// Turns this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = relocate(%tok, i32 4, i32 4) +// %ptr' = relocate(%tok, i32 4, i32 5) +// %val = load %ptr' +// +// into this: +// +// %base = ... +// %ptr = gep %base + 15 +// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) +// %base' = gc.relocate(%tok, i32 4, i32 4) +// %ptr' = gep %base' + 15 +// %val = load %ptr' +bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { + bool MadeChange = false; + SmallVector<GCRelocateInst *, 2> AllRelocateCalls; + + for (auto *U : I.users()) + if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U)) + // Collect all the relocate calls associated with a statepoint + AllRelocateCalls.push_back(Relocate); + + // We need atleast one base pointer relocation + one derived pointer + // relocation to mangle + if (AllRelocateCalls.size() < 2) + return false; + + // RelocateInstMap is a mapping from the base relocate instruction to the + // corresponding derived relocate instructions + DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap; + computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); + if (RelocateInstMap.empty()) + return false; + + for (auto &Item : RelocateInstMap) + // Item.first is the RelocatedBase to offset against + // Item.second is the vector of Targets to replace + MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); + return MadeChange; +} + +/// SinkCast - Sink the specified cast instruction into its user blocks +static bool SinkCast(CastInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// InsertedCasts - Only insert a cast in each block once. + DenseMap<BasicBlock*, CastInst*> InsertedCasts; + + bool MadeChange = false; + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Figure out which BB this cast is used in. For PHI's this is the + // appropriate predecessor block. + BasicBlock *UserBB = User->getParent(); + if (PHINode *PN = dyn_cast<PHINode>(User)) { + UserBB = PN->getIncomingBlock(TheUse); + } + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // If the block selected to receive the cast is an EH pad that does not + // allow non-PHI instructions before the terminator, we can't sink the + // cast. + if (UserBB->getTerminator()->isEHPad()) + continue; + + // If this user is in the same block as the cast, don't change the cast. + if (UserBB == DefBB) continue; + + // If we have already inserted a cast into this block, use it. + CastInst *&InsertedCast = InsertedCasts[UserBB]; + + if (!InsertedCast) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); + InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), + CI->getType(), "", &*InsertPt); + } + + // Replace a use of the cast with a use of the new cast. + TheUse = InsertedCast; + MadeChange = true; + ++NumCastUses; + } + + // If we removed all uses, nuke the cast. + if (CI->use_empty()) { + CI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +/// If the specified cast instruction is a noop copy (e.g. it's casting from +/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to +/// reduce the number of virtual registers that must be created and coalesced. +/// +/// Return true if any changes are made. +/// +static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, + const DataLayout &DL) { + // If this is a noop copy, + EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, CI->getType()); + + // This is an fp<->int conversion? + if (SrcVT.isInteger() != DstVT.isInteger()) + return false; + + // If this is an extension, it will be a zero or sign extension, which + // isn't a noop. + if (SrcVT.bitsLT(DstVT)) return false; + + // If these values will be promoted, find out what they will be promoted + // to. This helps us consider truncates on PPC as noop copies when they + // are. + if (TLI.getTypeAction(CI->getContext(), SrcVT) == + TargetLowering::TypePromoteInteger) + SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); + if (TLI.getTypeAction(CI->getContext(), DstVT) == + TargetLowering::TypePromoteInteger) + DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); + + // If, after promotion, these are the same types, this is a noop copy. + if (SrcVT != DstVT) + return false; + + return SinkCast(CI); +} + +/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if +/// possible. +/// +/// Return true if any changes were made. +static bool CombineUAddWithOverflow(CmpInst *CI) { + Value *A, *B; + Instruction *AddI; + if (!match(CI, + m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI)))) + return false; + + Type *Ty = AddI->getType(); + if (!isa<IntegerType>(Ty)) + return false; + + // We don't want to move around uses of condition values this late, so we we + // check if it is legal to create the call to the intrinsic in the basic + // block containing the icmp: + + if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse()) + return false; + +#ifndef NDEBUG + // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption + // for now: + if (AddI->hasOneUse()) + assert(*AddI->user_begin() == CI && "expected!"); +#endif + + Module *M = CI->getModule(); + Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty); + + auto *InsertPt = AddI->hasOneUse() ? CI : AddI; + + auto *UAddWithOverflow = + CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt); + auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt); + auto *Overflow = + ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt); + + CI->replaceAllUsesWith(Overflow); + AddI->replaceAllUsesWith(UAdd); + CI->eraseFromParent(); + AddI->eraseFromParent(); + return true; +} + +/// Sink the given CmpInst into user blocks to reduce the number of virtual +/// registers that must be created and coalesced. This is a clear win except on +/// targets with multiple condition code registers (PowerPC), where it might +/// lose; some adjustment may be wanted there. +/// +/// Return true if any changes are made. +static bool SinkCmpExpression(CmpInst *CI) { + BasicBlock *DefBB = CI->getParent(); + + /// Only insert a cmp in each block once. + DenseMap<BasicBlock*, CmpInst*> InsertedCmps; + + bool MadeChange = false; + for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); + UI != E; ) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa<PHINode>(User)) + continue; + + // Figure out which BB this cmp is used in. + BasicBlock *UserBB = User->getParent(); + + // If this user is in the same block as the cmp, don't change the cmp. + if (UserBB == DefBB) continue; + + // If we have already inserted a cmp into this block, use it. + CmpInst *&InsertedCmp = InsertedCmps[UserBB]; + + if (!InsertedCmp) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); + InsertedCmp = + CmpInst::Create(CI->getOpcode(), CI->getPredicate(), + CI->getOperand(0), CI->getOperand(1), "", &*InsertPt); + } + + // Replace a use of the cmp with a use of the new cmp. + TheUse = InsertedCmp; + MadeChange = true; + ++NumCmpUses; + } + + // If we removed all uses, nuke the cmp. + if (CI->use_empty()) { + CI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +static bool OptimizeCmpExpression(CmpInst *CI) { + if (SinkCmpExpression(CI)) + return true; + + if (CombineUAddWithOverflow(CI)) + return true; + + return false; +} + +/// Check if the candidates could be combined with a shift instruction, which +/// includes: +/// 1. Truncate instruction +/// 2. And instruction and the imm is a mask of the low bits: +/// imm & (imm+1) == 0 +static bool isExtractBitsCandidateUse(Instruction *User) { + if (!isa<TruncInst>(User)) { + if (User->getOpcode() != Instruction::And || + !isa<ConstantInt>(User->getOperand(1))) + return false; + + const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); + + if ((Cimm & (Cimm + 1)).getBoolValue()) + return false; + } + return true; +} + +/// Sink both shift and truncate instruction to the use of truncate's BB. +static bool +SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, + DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, + const TargetLowering &TLI, const DataLayout &DL) { + BasicBlock *UserBB = User->getParent(); + DenseMap<BasicBlock *, CastInst *> InsertedTruncs; + TruncInst *TruncI = dyn_cast<TruncInst>(User); + bool MadeChange = false; + + for (Value::user_iterator TruncUI = TruncI->user_begin(), + TruncE = TruncI->user_end(); + TruncUI != TruncE;) { + + Use &TruncTheUse = TruncUI.getUse(); + Instruction *TruncUser = cast<Instruction>(*TruncUI); + // Preincrement use iterator so we don't invalidate it. + + ++TruncUI; + + int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); + if (!ISDOpcode) + continue; + + // If the use is actually a legal node, there will not be an + // implicit truncate. + // FIXME: always querying the result type is just an + // approximation; some nodes' legality is determined by the + // operand or other means. There's no good way to find out though. + if (TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) + continue; + + // Don't bother for PHI nodes. + if (isa<PHINode>(TruncUser)) + continue; + + BasicBlock *TruncUserBB = TruncUser->getParent(); + + if (UserBB == TruncUserBB) + continue; + + BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; + CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; + + if (!InsertedShift && !InsertedTrunc) { + BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); + assert(InsertPt != TruncUserBB->end()); + // Sink the shift + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); + else + InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); + + // Sink the trunc + BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); + TruncInsertPt++; + assert(TruncInsertPt != TruncUserBB->end()); + + InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, + TruncI->getType(), "", &*TruncInsertPt); + + MadeChange = true; + + TruncTheUse = InsertedTrunc; + } + } + return MadeChange; +} + +/// Sink the shift *right* instruction into user blocks if the uses could +/// potentially be combined with this shift instruction and generate BitExtract +/// instruction. It will only be applied if the architecture supports BitExtract +/// instruction. Here is an example: +/// BB1: +/// %x.extract.shift = lshr i64 %arg1, 32 +/// BB2: +/// %x.extract.trunc = trunc i64 %x.extract.shift to i16 +/// ==> +/// +/// BB2: +/// %x.extract.shift.1 = lshr i64 %arg1, 32 +/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 +/// +/// CodeGen will recoginze the pattern in BB2 and generate BitExtract +/// instruction. +/// Return true if any changes are made. +static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, + const TargetLowering &TLI, + const DataLayout &DL) { + BasicBlock *DefBB = ShiftI->getParent(); + + /// Only insert instructions in each block once. + DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; + + bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); + + bool MadeChange = false; + for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); + UI != E;) { + Use &TheUse = UI.getUse(); + Instruction *User = cast<Instruction>(*UI); + // Preincrement use iterator so we don't invalidate it. + ++UI; + + // Don't bother for PHI nodes. + if (isa<PHINode>(User)) + continue; + + if (!isExtractBitsCandidateUse(User)) + continue; + + BasicBlock *UserBB = User->getParent(); + + if (UserBB == DefBB) { + // If the shift and truncate instruction are in the same BB. The use of + // the truncate(TruncUse) may still introduce another truncate if not + // legal. In this case, we would like to sink both shift and truncate + // instruction to the BB of TruncUse. + // for example: + // BB1: + // i64 shift.result = lshr i64 opnd, imm + // trunc.result = trunc shift.result to i16 + // + // BB2: + // ----> We will have an implicit truncate here if the architecture does + // not have i16 compare. + // cmp i16 trunc.result, opnd2 + // + if (isa<TruncInst>(User) && shiftIsLegal + // If the type of the truncate is legal, no trucate will be + // introduced in other basic blocks. + && + (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) + MadeChange = + SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); + + continue; + } + // If we have already inserted a shift into this block, use it. + BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; + + if (!InsertedShift) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); + + if (ShiftI->getOpcode() == Instruction::AShr) + InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); + else + InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, + "", &*InsertPt); + + MadeChange = true; + } + + // Replace a use of the shift with a use of the new shift. + TheUse = InsertedShift; + } + + // If we removed all uses, nuke the shift. + if (ShiftI->use_empty()) + ShiftI->eraseFromParent(); + + return MadeChange; +} + +// Translate a masked load intrinsic like +// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align, +// <16 x i1> %mask, <16 x i32> %passthru) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.load, label %else +// +//cond.load: ; preds = %0 +// %4 = getelementptr i32* %1, i32 0 +// %5 = load i32* %4 +// %6 = insertelement <16 x i32> undef, i32 %5, i32 0 +// br label %else +// +//else: ; preds = %0, %cond.load +// %res.phi.else = phi <16 x i32> [ %6, %cond.load ], [ undef, %0 ] +// %7 = extractelement <16 x i1> %mask, i32 1 +// %8 = icmp eq i1 %7, true +// br i1 %8, label %cond.load1, label %else2 +// +//cond.load1: ; preds = %else +// %9 = getelementptr i32* %1, i32 1 +// %10 = load i32* %9 +// %11 = insertelement <16 x i32> %res.phi.else, i32 %10, i32 1 +// br label %else2 +// +//else2: ; preds = %else, %cond.load1 +// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] +// %12 = extractelement <16 x i1> %mask, i32 2 +// %13 = icmp eq i1 %12, true +// br i1 %13, label %cond.load4, label %else5 +// +static void ScalarizeMaskedLoad(CallInst *CI) { + Value *Ptr = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + assert(VecType && "Unexpected return type of masked load intrinsic"); + + Type *EltTy = CI->getType()->getVectorElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = isa<Constant>(Mask) && + cast<Constant>(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::min(AlignVal, VecType->getScalarSizeInBits()/8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + + if (isa<ConstantVector>(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst* Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ] + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_load = icmp eq i1 %mask_1, true + // br i1 %to_load, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked store intrinsic, like +// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align, +// <16 x i1> %mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set +// +// %1 = bitcast i8* %addr to i32* +// %2 = extractelement <16 x i1> %mask, i32 0 +// %3 = icmp eq i1 %2, true +// br i1 %3, label %cond.store, label %else +// +// cond.store: ; preds = %0 +// %4 = extractelement <16 x i32> %val, i32 0 +// %5 = getelementptr i32* %1, i32 0 +// store i32 %4, i32* %5 +// br label %else +// +// else: ; preds = %0, %cond.store +// %6 = extractelement <16 x i1> %mask, i32 1 +// %7 = icmp eq i1 %6, true +// br i1 %7, label %cond.store1, label %else2 +// +// cond.store1: ; preds = %else +// %8 = extractelement <16 x i32> %val, i32 1 +// %9 = getelementptr i32* %1, i32 1 +// store i32 %8, i32* %9 +// br label %else2 +// . . . +static void ScalarizeMaskedStore(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptr = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + VectorType *VecType = dyn_cast<VectorType>(Src->getType()); + assert(VecType && "Unexpected data type in masked store intrinsic"); + + Type *EltTy = VecType->getElementType(); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + // Short-cut if the mask is all-true. + bool IsAllOnesMask = isa<Constant>(Mask) && + cast<Constant>(Mask)->isAllOnesValue(); + + if (IsAllOnesMask) { + Builder.CreateAlignedStore(Src, Ptr, AlignVal); + CI->eraseFromParent(); + return; + } + + // Adjust alignment for the scalar instruction. + AlignVal = std::max(AlignVal, VecType->getScalarSizeInBits()/8); + // Bitcast %addr fron i8* to EltTy* + Type *NewPtrType = + EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace()); + Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType); + unsigned VectorWidth = VecType->getNumElements(); + + if (isa<ConstantVector>(Mask)) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + } + CI->eraseFromParent(); + return; + } + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %mask_1 = extractelement <16 x i1> %mask, i32 Idx + // %to_store = icmp eq i1 %mask_1, true + // br i1 %to_store, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1)); + + // Create "cond" block + // + // %OneElt = extractelement <16 x i32> %Src, i32 Idx + // %EltAddr = getelementptr i32* %1, i32 0 + // %store i32 %OneElt, i32* %EltAddr + // + BasicBlock *CondBlock = + IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx)); + Value *Gep = + Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx)); + Builder.CreateAlignedStore(OneElt, Gep, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = + CondBlock->splitBasicBlock(InsertPt->getIterator(), "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +// Translate a masked gather intrinsic like +// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4, +// <16 x i1> %Mask, <16 x i32> %Src) +// to a chain of basic blocks, with loading element one-by-one if +// the appropriate mask bit is set +// +// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> %Mask, i32 0 +// % ToLoad0 = icmp eq i1 % Mask0, true +// br i1 % ToLoad0, label %cond.load, label %else +// +// cond.load: +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// % Load0 = load i32, i32* % Ptr0, align 4 +// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0 +// br label %else +// +// else: +// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0] +// % Mask1 = extractelement <16 x i1> %Mask, i32 1 +// % ToLoad1 = icmp eq i1 % Mask1, true +// br i1 % ToLoad1, label %cond.load1, label %else2 +// +// cond.load1: +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// % Load1 = load i32, i32* % Ptr1, align 4 +// % Res1 = insertelement <16 x i32> %res.phi.else, i32 % Load1, i32 1 +// br label %else2 +// . . . +// % Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src +// ret <16 x i32> %Result +static void ScalarizeMaskedGather(CallInst *CI) { + Value *Ptrs = CI->getArgOperand(0); + Value *Alignment = CI->getArgOperand(1); + Value *Mask = CI->getArgOperand(2); + Value *Src0 = CI->getArgOperand(3); + + VectorType *VecType = dyn_cast<VectorType>(CI->getType()); + + assert(VecType && "Unexpected return type of masked load intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + BasicBlock *CondBlock = nullptr; + BasicBlock *PrevIfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + Value *UndefVal = UndefValue::get(VecType); + + // The result vector + Value *VResult = UndefVal; + unsigned VectorWidth = VecType->getNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa<ConstantVector>(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, + "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, + Builder.getInt32(Idx), + "Res" + Twine(Idx)); + } + Value *NewI = Builder.CreateSelect(Mask, VResult, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); + return; + } + + PHINode *Phi = nullptr; + Value *PrevPhi = UndefVal; + + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + + // Fill the "else" block, created in the previous iteration + // + // %Mask1 = extractelement <16 x i1> %Mask, i32 1 + // %ToLoad1 = icmp eq i1 %Mask1, true + // br i1 %ToLoad1, label %cond.load, label %else + // + if (Idx > 0) { + Phi = Builder.CreatePHI(VecType, 2, "res.phi.else"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + PrevPhi = Phi; + VResult = Phi; + } + + Value *Predicate = Builder.CreateExtractElement(Mask, + Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToLoad" + Twine(Idx)); + + // Create "cond" block + // + // %EltAddr = getelementptr i32* %1, i32 0 + // %Elt = load i32* %EltAddr + // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx + // + CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load"); + Builder.SetInsertPoint(InsertPt); + + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + LoadInst *Load = Builder.CreateAlignedLoad(Ptr, AlignVal, + "Load" + Twine(Idx)); + VResult = Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx), + "Res" + Twine(Idx)); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + PrevIfBlock = IfBlock; + IfBlock = NewIfBlock; + } + + Phi = Builder.CreatePHI(VecType, 2, "res.phi.select"); + Phi->addIncoming(VResult, CondBlock); + Phi->addIncoming(PrevPhi, PrevIfBlock); + Value *NewI = Builder.CreateSelect(Mask, Phi, Src0); + CI->replaceAllUsesWith(NewI); + CI->eraseFromParent(); +} + +// Translate a masked scatter intrinsic, like +// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4, +// <16 x i1> %Mask) +// to a chain of basic blocks, that stores element one-by-one if +// the appropriate mask bit is set. +// +// % Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind +// % Mask0 = extractelement <16 x i1> % Mask, i32 0 +// % ToStore0 = icmp eq i1 % Mask0, true +// br i1 %ToStore0, label %cond.store, label %else +// +// cond.store: +// % Elt0 = extractelement <16 x i32> %Src, i32 0 +// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0 +// store i32 %Elt0, i32* % Ptr0, align 4 +// br label %else +// +// else: +// % Mask1 = extractelement <16 x i1> % Mask, i32 1 +// % ToStore1 = icmp eq i1 % Mask1, true +// br i1 % ToStore1, label %cond.store1, label %else2 +// +// cond.store1: +// % Elt1 = extractelement <16 x i32> %Src, i32 1 +// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 +// store i32 % Elt1, i32* % Ptr1, align 4 +// br label %else2 +// . . . +static void ScalarizeMaskedScatter(CallInst *CI) { + Value *Src = CI->getArgOperand(0); + Value *Ptrs = CI->getArgOperand(1); + Value *Alignment = CI->getArgOperand(2); + Value *Mask = CI->getArgOperand(3); + + assert(isa<VectorType>(Src->getType()) && + "Unexpected data type in masked scatter intrinsic"); + assert(isa<VectorType>(Ptrs->getType()) && + isa<PointerType>(Ptrs->getType()->getVectorElementType()) && + "Vector of pointers is expected in masked scatter intrinsic"); + + IRBuilder<> Builder(CI->getContext()); + Instruction *InsertPt = CI; + BasicBlock *IfBlock = CI->getParent(); + Builder.SetInsertPoint(InsertPt); + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + + unsigned AlignVal = cast<ConstantInt>(Alignment)->getZExtValue(); + unsigned VectorWidth = Src->getType()->getVectorNumElements(); + + // Shorten the way if the mask is a vector of constants. + bool IsConstMask = isa<ConstantVector>(Mask); + + if (IsConstMask) { + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + if (cast<ConstantVector>(Mask)->getOperand(Idx)->isNullValue()) + continue; + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + } + CI->eraseFromParent(); + return; + } + for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) { + // Fill the "else" block, created in the previous iteration + // + // % Mask1 = extractelement <16 x i1> % Mask, i32 Idx + // % ToStore = icmp eq i1 % Mask1, true + // br i1 % ToStore, label %cond.store, label %else + // + Value *Predicate = Builder.CreateExtractElement(Mask, + Builder.getInt32(Idx), + "Mask" + Twine(Idx)); + Value *Cmp = + Builder.CreateICmp(ICmpInst::ICMP_EQ, Predicate, + ConstantInt::get(Predicate->getType(), 1), + "ToStore" + Twine(Idx)); + + // Create "cond" block + // + // % Elt1 = extractelement <16 x i32> %Src, i32 1 + // % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1 + // %store i32 % Elt1, i32* % Ptr1 + // + BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store"); + Builder.SetInsertPoint(InsertPt); + + Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx), + "Elt" + Twine(Idx)); + Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx), + "Ptr" + Twine(Idx)); + Builder.CreateAlignedStore(OneElt, Ptr, AlignVal); + + // Create "else" block, fill it in the next iteration + BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else"); + Builder.SetInsertPoint(InsertPt); + Instruction *OldBr = IfBlock->getTerminator(); + BranchInst::Create(CondBlock, NewIfBlock, Cmp, OldBr); + OldBr->eraseFromParent(); + IfBlock = NewIfBlock; + } + CI->eraseFromParent(); +} + +/// If counting leading or trailing zeros is an expensive operation and a zero +/// input is defined, add a check for zero to avoid calling the intrinsic. +/// +/// We want to transform: +/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) +/// +/// into: +/// entry: +/// %cmpz = icmp eq i64 %A, 0 +/// br i1 %cmpz, label %cond.end, label %cond.false +/// cond.false: +/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) +/// br label %cond.end +/// cond.end: +/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] +/// +/// If the transform is performed, return true and set ModifiedDT to true. +static bool despeculateCountZeros(IntrinsicInst *CountZeros, + const TargetLowering *TLI, + const DataLayout *DL, + bool &ModifiedDT) { + if (!TLI || !DL) + return false; + + // If a zero input is undefined, it doesn't make sense to despeculate that. + if (match(CountZeros->getOperand(1), m_One())) + return false; + + // If it's cheap to speculate, there's nothing to do. + auto IntrinsicID = CountZeros->getIntrinsicID(); + if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || + (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) + return false; + + // Only handle legal scalar cases. Anything else requires too much work. + Type *Ty = CountZeros->getType(); + unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); + if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSize()) + return false; + + // The intrinsic will be sunk behind a compare against zero and branch. + BasicBlock *StartBlock = CountZeros->getParent(); + BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); + + // Create another block after the count zero intrinsic. A PHI will be added + // in this block to select the result of the intrinsic or the bit-width + // constant if the input to the intrinsic is zero. + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); + BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); + + // Set up a builder to create a compare, conditional branch, and PHI. + IRBuilder<> Builder(CountZeros->getContext()); + Builder.SetInsertPoint(StartBlock->getTerminator()); + Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); + + // Replace the unconditional branch that was created by the first split with + // a compare against zero and a conditional branch. + Value *Zero = Constant::getNullValue(Ty); + Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); + Builder.CreateCondBr(Cmp, EndBlock, CallBlock); + StartBlock->getTerminator()->eraseFromParent(); + + // Create a PHI in the end block to select either the output of the intrinsic + // or the bit width of the operand. + Builder.SetInsertPoint(&EndBlock->front()); + PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); + CountZeros->replaceAllUsesWith(PN); + Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); + PN->addIncoming(BitWidth, StartBlock); + PN->addIncoming(CountZeros, CallBlock); + + // We are explicitly handling the zero case, so we can set the intrinsic's + // undefined zero argument to 'true'. This will also prevent reprocessing the + // intrinsic; we only despeculate when a zero input is defined. + CountZeros->setArgOperand(1, Builder.getTrue()); + ModifiedDT = true; + return true; +} + +bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) { + BasicBlock *BB = CI->getParent(); + + // Lower inline assembly if we can. + // If we found an inline asm expession, and if the target knows how to + // lower it to normal LLVM code, do so now. + if (TLI && isa<InlineAsm>(CI->getCalledValue())) { + if (TLI->ExpandInlineAsm(CI)) { + // Avoid invalidating the iterator. + CurInstIterator = BB->begin(); + // Avoid processing instructions out of order, which could cause + // reuse before a value is defined. + SunkAddrs.clear(); + return true; + } + // Sink address computing for memory operands into the block. + if (optimizeInlineAsmInst(CI)) + return true; + } + + // Align the pointer arguments to this call if the target thinks it's a good + // idea + unsigned MinSize, PrefAlign; + if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { + for (auto &Arg : CI->arg_operands()) { + // We want to align both objects whose address is used directly and + // objects whose address is used in casts and GEPs, though it only makes + // sense for GEPs if the offset is a multiple of the desired alignment and + // if size - offset meets the size threshold. + if (!Arg->getType()->isPointerTy()) + continue; + APInt Offset(DL->getPointerSizeInBits( + cast<PointerType>(Arg->getType())->getAddressSpace()), + 0); + Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); + uint64_t Offset2 = Offset.getLimitedValue(); + if ((Offset2 & (PrefAlign-1)) != 0) + continue; + AllocaInst *AI; + if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && + DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) + AI->setAlignment(PrefAlign); + // Global variables can only be aligned if they are defined in this + // object (i.e. they are uniquely initialized in this object), and + // over-aligning global variables that have an explicit section is + // forbidden. + GlobalVariable *GV; + if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->hasUniqueInitializer() && + !GV->hasSection() && GV->getAlignment() < PrefAlign && + DL->getTypeAllocSize(GV->getType()->getElementType()) >= + MinSize + Offset2) + GV->setAlignment(PrefAlign); + } + // If this is a memcpy (or similar) then we may be able to improve the + // alignment + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { + unsigned Align = getKnownAlignment(MI->getDest(), *DL); + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) + Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL)); + if (Align > MI->getAlignment()) + MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align)); + } + } + + IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); + if (II) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::objectsize: { + // Lower all uses of llvm.objectsize.* + bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); + Type *ReturnTy = CI->getType(); + Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); + + // Substituting this can cause recursive simplifications, which can + // invalidate our iterator. Use a WeakVH to hold onto it in case this + // happens. + WeakVH IterHandle(&*CurInstIterator); + + replaceAndRecursivelySimplify(CI, RetVal, + TLInfo, nullptr); + + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + return true; + } + case Intrinsic::masked_load: { + // Scalarize unsupported vector masked load + if (!TTI->isLegalMaskedLoad(CI->getType())) { + ScalarizeMaskedLoad(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_store: { + if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) { + ScalarizeMaskedStore(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_gather: { + if (!TTI->isLegalMaskedGather(CI->getType())) { + ScalarizeMaskedGather(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::masked_scatter: { + if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) { + ScalarizeMaskedScatter(CI); + ModifiedDT = true; + return true; + } + return false; + } + case Intrinsic::aarch64_stlxr: + case Intrinsic::aarch64_stxr: { + ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); + if (!ExtVal || !ExtVal->hasOneUse() || + ExtVal->getParent() == CI->getParent()) + return false; + // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. + ExtVal->moveBefore(CI); + // Mark this instruction as "inserted by CGP", so that other + // optimizations don't touch it. + InsertedInsts.insert(ExtVal); + return true; + } + case Intrinsic::invariant_group_barrier: + II->replaceAllUsesWith(II->getArgOperand(0)); + II->eraseFromParent(); + return true; + + case Intrinsic::cttz: + case Intrinsic::ctlz: + // If counting zeros is expensive, try to avoid it. + return despeculateCountZeros(II, TLI, DL, ModifiedDT); + } + + if (TLI) { + // Unknown address space. + // TODO: Target hook to pick which address space the intrinsic cares + // about? + unsigned AddrSpace = ~0u; + SmallVector<Value*, 2> PtrOps; + Type *AccessTy; + if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace)) + while (!PtrOps.empty()) + if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace)) + return true; + } + } + + // From here on out we're working with named functions. + if (!CI->getCalledFunction()) return false; + + // Lower all default uses of _chk calls. This is very similar + // to what InstCombineCalls does, but here we are only lowering calls + // to fortified library functions (e.g. __memcpy_chk) that have the default + // "don't know" as the objectsize. Anything else should be left alone. + FortifiedLibCallSimplifier Simplifier(TLInfo, true); + if (Value *V = Simplifier.optimizeCall(CI)) { + CI->replaceAllUsesWith(V); + CI->eraseFromParent(); + return true; + } + return false; +} + +/// Look for opportunities to duplicate return instructions to the predecessor +/// to enable tail call optimizations. The case it is currently looking for is: +/// @code +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// @endcode +/// +/// => +/// +/// @code +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// @endcode +bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) { + if (!TLI) + return false; + + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) + return false; + + PHINode *PN = nullptr; + BitCastInst *BCI = nullptr; + Value *V = RI->getReturnValue(); + if (V) { + BCI = dyn_cast<BitCastInst>(V); + if (BCI) + V = BCI->getOperand(0); + + PN = dyn_cast<PHINode>(V); + if (!PN) + return false; + } + + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + AttributeSet CallerAttrs = F->getAttributes(); + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); + if (&*BI == BCI) + // Also skip over the bitcast. + ++BI; + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa<DbgInfoIntrinsic>(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector<CallInst*, 4> TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet<BasicBlock*, 4> VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI).second) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast<CallInst>(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CalleeAttrs = CS.getAttributes(); + if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// Memory Optimization +//===----------------------------------------------------------------------===// + +namespace { + +/// This is an extended version of TargetLowering::AddrMode +/// which holds actual Value*'s for register values. +struct ExtAddrMode : public TargetLowering::AddrMode { + Value *BaseReg; + Value *ScaledReg; + ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {} + void print(raw_ostream &OS) const; + void dump() const; + + bool operator==(const ExtAddrMode& O) const { + return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && + (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && + (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); + } +}; + +#ifndef NDEBUG +static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { + AM.print(OS); + return OS; +} +#endif + +void ExtAddrMode::print(raw_ostream &OS) const { + bool NeedPlus = false; + OS << "["; + if (BaseGV) { + OS << (NeedPlus ? " + " : "") + << "GV:"; + BaseGV->printAsOperand(OS, /*PrintType=*/false); + NeedPlus = true; + } + + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } + + if (BaseReg) { + OS << (NeedPlus ? " + " : "") + << "Base:"; + BaseReg->printAsOperand(OS, /*PrintType=*/false); + NeedPlus = true; + } + if (Scale) { + OS << (NeedPlus ? " + " : "") + << Scale << "*"; + ScaledReg->printAsOperand(OS, /*PrintType=*/false); + } + + OS << ']'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ExtAddrMode::dump() const { + print(dbgs()); + dbgs() << '\n'; +} +#endif + +/// \brief This class provides transaction based operation on the IR. +/// Every change made through this class is recorded in the internal state and +/// can be undone (rollback) until commit is called. +class TypePromotionTransaction { + + /// \brief This represents the common interface of the individual transaction. + /// Each class implements the logic for doing one specific modification on + /// the IR via the TypePromotionTransaction. + class TypePromotionAction { + protected: + /// The Instruction modified. + Instruction *Inst; + + public: + /// \brief Constructor of the action. + /// The constructor performs the related action on the IR. + TypePromotionAction(Instruction *Inst) : Inst(Inst) {} + + virtual ~TypePromotionAction() {} + + /// \brief Undo the modification done by this action. + /// When this method is called, the IR must be in the same state as it was + /// before this action was applied. + /// \pre Undoing the action works if and only if the IR is in the exact same + /// state as it was directly after this action was applied. + virtual void undo() = 0; + + /// \brief Advocate every change made by this action. + /// When the results on the IR of the action are to be kept, it is important + /// to call this function, otherwise hidden information may be kept forever. + virtual void commit() { + // Nothing to be done, this action is not doing anything. + } + }; + + /// \brief Utility to remember the position of an instruction. + class InsertionHandler { + /// Position of an instruction. + /// Either an instruction: + /// - Is the first in a basic block: BB is used. + /// - Has a previous instructon: PrevInst is used. + union { + Instruction *PrevInst; + BasicBlock *BB; + } Point; + /// Remember whether or not the instruction had a previous instruction. + bool HasPrevInstruction; + + public: + /// \brief Record the position of \p Inst. + InsertionHandler(Instruction *Inst) { + BasicBlock::iterator It = Inst->getIterator(); + HasPrevInstruction = (It != (Inst->getParent()->begin())); + if (HasPrevInstruction) + Point.PrevInst = &*--It; + else + Point.BB = Inst->getParent(); + } + + /// \brief Insert \p Inst at the recorded position. + void insert(Instruction *Inst) { + if (HasPrevInstruction) { + if (Inst->getParent()) + Inst->removeFromParent(); + Inst->insertAfter(Point.PrevInst); + } else { + Instruction *Position = &*Point.BB->getFirstInsertionPt(); + if (Inst->getParent()) + Inst->moveBefore(Position); + else + Inst->insertBefore(Position); + } + } + }; + + /// \brief Move an instruction before another. + class InstructionMoveBefore : public TypePromotionAction { + /// Original position of the instruction. + InsertionHandler Position; + + public: + /// \brief Move \p Inst before \p Before. + InstructionMoveBefore(Instruction *Inst, Instruction *Before) + : TypePromotionAction(Inst), Position(Inst) { + DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n"); + Inst->moveBefore(Before); + } + + /// \brief Move the instruction back to its original position. + void undo() override { + DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n"); + Position.insert(Inst); + } + }; + + /// \brief Set the operand of an instruction with a new value. + class OperandSetter : public TypePromotionAction { + /// Original operand of the instruction. + Value *Origin; + /// Index of the modified instruction. + unsigned Idx; + + public: + /// \brief Set \p Idx operand of \p Inst with \p NewVal. + OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) + : TypePromotionAction(Inst), Idx(Idx) { + DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n" + << "for:" << *Inst << "\n" + << "with:" << *NewVal << "\n"); + Origin = Inst->getOperand(Idx); + Inst->setOperand(Idx, NewVal); + } + + /// \brief Restore the original value of the instruction. + void undo() override { + DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n" + << "for: " << *Inst << "\n" + << "with: " << *Origin << "\n"); + Inst->setOperand(Idx, Origin); + } + }; + + /// \brief Hide the operands of an instruction. + /// Do as if this instruction was not using any of its operands. + class OperandsHider : public TypePromotionAction { + /// The list of original operands. + SmallVector<Value *, 4> OriginalValues; + + public: + /// \brief Remove \p Inst from the uses of the operands of \p Inst. + OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { + DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n"); + unsigned NumOpnds = Inst->getNumOperands(); + OriginalValues.reserve(NumOpnds); + for (unsigned It = 0; It < NumOpnds; ++It) { + // Save the current operand. + Value *Val = Inst->getOperand(It); + OriginalValues.push_back(Val); + // Set a dummy one. + // We could use OperandSetter here, but that would imply an overhead + // that we are not willing to pay. + Inst->setOperand(It, UndefValue::get(Val->getType())); + } + } + + /// \brief Restore the original list of uses. + void undo() override { + DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n"); + for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) + Inst->setOperand(It, OriginalValues[It]); + } + }; + + /// \brief Build a truncate instruction. + class TruncBuilder : public TypePromotionAction { + Value *Val; + public: + /// \brief Build a truncate instruction of \p Opnd producing a \p Ty + /// result. + /// trunc Opnd to Ty. + TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { + IRBuilder<> Builder(Opnd); + Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n"); + } + + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast<Instruction>(Val)) + IVal->eraseFromParent(); + } + }; + + /// \brief Build a sign extension instruction. + class SExtBuilder : public TypePromotionAction { + Value *Val; + public: + /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty + /// result. + /// sext Opnd to Ty. + SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(InsertPt) { + IRBuilder<> Builder(InsertPt); + Val = Builder.CreateSExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n"); + } + + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast<Instruction>(Val)) + IVal->eraseFromParent(); + } + }; + + /// \brief Build a zero extension instruction. + class ZExtBuilder : public TypePromotionAction { + Value *Val; + public: + /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty + /// result. + /// zext Opnd to Ty. + ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) + : TypePromotionAction(InsertPt) { + IRBuilder<> Builder(InsertPt); + Val = Builder.CreateZExt(Opnd, Ty, "promoted"); + DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n"); + } + + /// \brief Get the built value. + Value *getBuiltValue() { return Val; } + + /// \brief Remove the built instruction. + void undo() override { + DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"); + if (Instruction *IVal = dyn_cast<Instruction>(Val)) + IVal->eraseFromParent(); + } + }; + + /// \brief Mutate an instruction to another type. + class TypeMutator : public TypePromotionAction { + /// Record the original type. + Type *OrigTy; + + public: + /// \brief Mutate the type of \p Inst into \p NewTy. + TypeMutator(Instruction *Inst, Type *NewTy) + : TypePromotionAction(Inst), OrigTy(Inst->getType()) { + DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy + << "\n"); + Inst->mutateType(NewTy); + } + + /// \brief Mutate the instruction back to its original type. + void undo() override { + DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy + << "\n"); + Inst->mutateType(OrigTy); + } + }; + + /// \brief Replace the uses of an instruction by another instruction. + class UsesReplacer : public TypePromotionAction { + /// Helper structure to keep track of the replaced uses. + struct InstructionAndIdx { + /// The instruction using the instruction. + Instruction *Inst; + /// The index where this instruction is used for Inst. + unsigned Idx; + InstructionAndIdx(Instruction *Inst, unsigned Idx) + : Inst(Inst), Idx(Idx) {} + }; + + /// Keep track of the original uses (pair Instruction, Index). + SmallVector<InstructionAndIdx, 4> OriginalUses; + typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator; + + public: + /// \brief Replace all the use of \p Inst by \p New. + UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { + DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New + << "\n"); + // Record the original uses. + for (Use &U : Inst->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); + } + // Now, we can replace the uses. + Inst->replaceAllUsesWith(New); + } + + /// \brief Reassign the original uses of Inst to Inst. + void undo() override { + DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"); + for (use_iterator UseIt = OriginalUses.begin(), + EndIt = OriginalUses.end(); + UseIt != EndIt; ++UseIt) { + UseIt->Inst->setOperand(UseIt->Idx, Inst); + } + } + }; + + /// \brief Remove an instruction from the IR. + class InstructionRemover : public TypePromotionAction { + /// Original position of the instruction. + InsertionHandler Inserter; + /// Helper structure to hide all the link to the instruction. In other + /// words, this helps to do as if the instruction was removed. + OperandsHider Hider; + /// Keep track of the uses replaced, if any. + UsesReplacer *Replacer; + + public: + /// \brief Remove all reference of \p Inst and optinally replace all its + /// uses with New. + /// \pre If !Inst->use_empty(), then New != nullptr + InstructionRemover(Instruction *Inst, Value *New = nullptr) + : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), + Replacer(nullptr) { + if (New) + Replacer = new UsesReplacer(Inst, New); + DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n"); + Inst->removeFromParent(); + } + + ~InstructionRemover() override { delete Replacer; } + + /// \brief Really remove the instruction. + void commit() override { delete Inst; } + + /// \brief Resurrect the instruction and reassign it to the proper uses if + /// new value was provided when build this action. + void undo() override { + DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"); + Inserter.insert(Inst); + if (Replacer) + Replacer->undo(); + Hider.undo(); + } + }; + +public: + /// Restoration point. + /// The restoration point is a pointer to an action instead of an iterator + /// because the iterator may be invalidated but not the pointer. + typedef const TypePromotionAction *ConstRestorationPt; + /// Advocate every changes made in that transaction. + void commit(); + /// Undo all the changes made after the given point. + void rollback(ConstRestorationPt Point); + /// Get the current restoration point. + ConstRestorationPt getRestorationPoint() const; + + /// \name API for IR modification with state keeping to support rollback. + /// @{ + /// Same as Instruction::setOperand. + void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); + /// Same as Instruction::eraseFromParent. + void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); + /// Same as Value::replaceAllUsesWith. + void replaceAllUsesWith(Instruction *Inst, Value *New); + /// Same as Value::mutateType. + void mutateType(Instruction *Inst, Type *NewTy); + /// Same as IRBuilder::createTrunc. + Value *createTrunc(Instruction *Opnd, Type *Ty); + /// Same as IRBuilder::createSExt. + Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as IRBuilder::createZExt. + Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); + /// Same as Instruction::moveBefore. + void moveBefore(Instruction *Inst, Instruction *Before); + /// @} + +private: + /// The ordered list of actions made so far. + SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; + typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt; +}; + +void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, + Value *NewVal) { + Actions.push_back( + make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal)); +} + +void TypePromotionTransaction::eraseInstruction(Instruction *Inst, + Value *NewVal) { + Actions.push_back( + make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal)); +} + +void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, + Value *New) { + Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); +} + +void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { + Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); +} + +Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, + Type *Ty) { + std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; +} + +Value *TypePromotionTransaction::createSExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; +} + +Value *TypePromotionTransaction::createZExt(Instruction *Inst, + Value *Opnd, Type *Ty) { + std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); + Value *Val = Ptr->getBuiltValue(); + Actions.push_back(std::move(Ptr)); + return Val; +} + +void TypePromotionTransaction::moveBefore(Instruction *Inst, + Instruction *Before) { + Actions.push_back( + make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before)); +} + +TypePromotionTransaction::ConstRestorationPt +TypePromotionTransaction::getRestorationPoint() const { + return !Actions.empty() ? Actions.back().get() : nullptr; +} + +void TypePromotionTransaction::commit() { + for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; + ++It) + (*It)->commit(); + Actions.clear(); +} + +void TypePromotionTransaction::rollback( + TypePromotionTransaction::ConstRestorationPt Point) { + while (!Actions.empty() && Point != Actions.back().get()) { + std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); + Curr->undo(); + } +} + +/// \brief A helper class for matching addressing modes. +/// +/// This encapsulates the logic for matching the target-legal addressing modes. +class AddressingModeMatcher { + SmallVectorImpl<Instruction*> &AddrModeInsts; + const TargetMachine &TM; + const TargetLowering &TLI; + const DataLayout &DL; + + /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and + /// the memory instruction that we're computing this address for. + Type *AccessTy; + unsigned AddrSpace; + Instruction *MemoryInst; + + /// This is the addressing mode that we're building up. This is + /// part of the return value of this addressing mode matching stuff. + ExtAddrMode &AddrMode; + + /// The instructions inserted by other CodeGenPrepare optimizations. + const SetOfInstrs &InsertedInsts; + /// A map from the instructions to their type before promotion. + InstrToOrigTy &PromotedInsts; + /// The ongoing transaction where every action should be registered. + TypePromotionTransaction &TPT; + + /// This is set to true when we should not do profitability checks. + /// When true, IsProfitableToFoldIntoAddressingMode always returns true. + bool IgnoreProfitability; + + AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI, + const TargetMachine &TM, Type *AT, unsigned AS, + Instruction *MI, ExtAddrMode &AM, + const SetOfInstrs &InsertedInsts, + InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT) + : AddrModeInsts(AMI), TM(TM), + TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent()) + ->getTargetLowering()), + DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), + MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), + PromotedInsts(PromotedInsts), TPT(TPT) { + IgnoreProfitability = false; + } +public: + + /// Find the maximal addressing mode that a load/store of V can fold, + /// give an access type of AccessTy. This returns a list of involved + /// instructions in AddrModeInsts. + /// \p InsertedInsts The instructions inserted by other CodeGenPrepare + /// optimizations. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p The ongoing transaction where every action should be registered. + static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS, + Instruction *MemoryInst, + SmallVectorImpl<Instruction*> &AddrModeInsts, + const TargetMachine &TM, + const SetOfInstrs &InsertedInsts, + InstrToOrigTy &PromotedInsts, + TypePromotionTransaction &TPT) { + ExtAddrMode Result; + + bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS, + MemoryInst, Result, InsertedInsts, + PromotedInsts, TPT).matchAddr(V, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + return Result; + } +private: + bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); + bool matchAddr(Value *V, unsigned Depth); + bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth, + bool *MovedAway = nullptr); + bool isProfitableToFoldIntoAddressingMode(Instruction *I, + ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter); + bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); + bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, + Value *PromotedOperand) const; +}; + +/// Try adding ScaleReg*Scale to the current addressing mode. +/// Return true and update AddrMode if this addr mode is legal for the target, +/// false if not. +bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, + unsigned Depth) { + // If Scale is 1, then this is the same as adding ScaleReg to the addressing + // mode. Just process that directly. + if (Scale == 1) + return matchAddr(ScaleReg, Depth); + + // If the scale is 0, it takes nothing to add this. + if (Scale == 0) + return true; + + // If we already have a scale of this value, we can add to it, otherwise, we + // need an available scale field. + if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) + return false; + + ExtAddrMode TestAddrMode = AddrMode; + + // Add scale to turn X*4+X*3 -> X*7. This could also do things like + // [A+B + A*7] -> [B+A*8]. + TestAddrMode.Scale += Scale; + TestAddrMode.ScaledReg = ScaleReg; + + // If the new address isn't legal, bail out. + if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) + return false; + + // It was legal, so commit it. + AddrMode = TestAddrMode; + + // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now + // to see if ScaleReg is actually X+C. If so, we can turn this into adding + // X*Scale + C*Scale to addr mode. + ConstantInt *CI = nullptr; Value *AddLHS = nullptr; + if (isa<Instruction>(ScaleReg) && // not a constant expr. + match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { + TestAddrMode.ScaledReg = AddLHS; + TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; + + // If this addressing mode is legal, commit it and remember that we folded + // this instruction. + if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { + AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); + AddrMode = TestAddrMode; + return true; + } + } + + // Otherwise, not (x+c)*scale, just return what we have. + return true; +} + +/// This is a little filter, which returns true if an addressing computation +/// involving I might be folded into a load/store accessing it. +/// This doesn't need to be perfect, but needs to accept at least +/// the set of instructions that MatchOperationAddr can. +static bool MightBeFoldableInst(Instruction *I) { + switch (I->getOpcode()) { + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + // Don't touch identity bitcasts. + if (I->getType() == I->getOperand(0)->getType()) + return false; + return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return true; + case Instruction::IntToPtr: + // We know the input is intptr_t, so this is foldable. + return true; + case Instruction::Add: + return true; + case Instruction::Mul: + case Instruction::Shl: + // Can only handle X*C and X << C. + return isa<ConstantInt>(I->getOperand(1)); + case Instruction::GetElementPtr: + return true; + default: + return false; + } +} + +/// \brief Check whether or not \p Val is a legal instruction for \p TLI. +/// \note \p Val is assumed to be the product of some type promotion. +/// Therefore if \p Val has an undefined state in \p TLI, this is assumed +/// to be legal, as the non-promoted value would have had the same state. +static bool isPromotedInstructionLegal(const TargetLowering &TLI, + const DataLayout &DL, Value *Val) { + Instruction *PromotedInst = dyn_cast<Instruction>(Val); + if (!PromotedInst) + return false; + int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); + // If the ISDOpcode is undefined, it was undefined before the promotion. + if (!ISDOpcode) + return true; + // Otherwise, check if the promoted instruction is legal or not. + return TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); +} + +/// \brief Hepler class to perform type promotion. +class TypePromotionHelper { + /// \brief Utility function to check whether or not a sign or zero extension + /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by + /// either using the operands of \p Inst or promoting \p Inst. + /// The type of the extension is defined by \p IsSExt. + /// In other words, check if: + /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. + /// #1 Promotion applies: + /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). + /// #2 Operand reuses: + /// ext opnd1 to ConsideredExtType. + /// \p PromotedInsts maps the instructions to their type before promotion. + static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, + const InstrToOrigTy &PromotedInsts, bool IsSExt); + + /// \brief Utility function to determine if \p OpIdx should be promoted when + /// promoting \p Inst. + static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { + return !(isa<SelectInst>(Inst) && OpIdx == 0); + } + + /// \brief Utility function to promote the operand of \p Ext when this + /// operand is a promotable trunc or sext or zext. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p CreatedInstsCost[out] contains the cost of all instructions + /// created to promote the operand of Ext. + /// Newly added extensions are inserted in \p Exts. + /// Newly added truncates are inserted in \p Truncs. + /// Should never be called directly. + /// \return The promoted value which is used instead of Ext. + static Value *promoteOperandForTruncAndAnyExt( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); + + /// \brief Utility function to promote the operand of \p Ext when this + /// operand is promotable and is not a supported trunc or sext. + /// \p PromotedInsts maps the instructions to their type before promotion. + /// \p CreatedInstsCost[out] contains the cost of all the instructions + /// created to promote the operand of Ext. + /// Newly added extensions are inserted in \p Exts. + /// Newly added truncates are inserted in \p Truncs. + /// Should never be called directly. + /// \return The promoted value which is used instead of Ext. + static Value *promoteOperandForOther(Instruction *Ext, + TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI, bool IsSExt); + + /// \see promoteOperandForOther. + static Value *signExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, true); + } + + /// \see promoteOperandForOther. + static Value *zeroExtendOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, + Exts, Truncs, TLI, false); + } + +public: + /// Type for the utility function that promotes the operand of Ext. + typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, + unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, + const TargetLowering &TLI); + /// \brief Given a sign/zero extend instruction \p Ext, return the approriate + /// action to promote the operand of \p Ext instead of using Ext. + /// \return NULL if no promotable action is possible with the current + /// sign extension. + /// \p InsertedInsts keeps track of all the instructions inserted by the + /// other CodeGenPrepare optimizations. This information is important + /// because we do not want to promote these instructions as CodeGenPrepare + /// will reinsert them later. Thus creating an infinite loop: create/remove. + /// \p PromotedInsts maps the instructions to their type before promotion. + static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts, + const TargetLowering &TLI, + const InstrToOrigTy &PromotedInsts); +}; + +bool TypePromotionHelper::canGetThrough(const Instruction *Inst, + Type *ConsideredExtType, + const InstrToOrigTy &PromotedInsts, + bool IsSExt) { + // The promotion helper does not know how to deal with vector types yet. + // To be able to fix that, we would need to fix the places where we + // statically extend, e.g., constants and such. + if (Inst->getType()->isVectorTy()) + return false; + + // We can always get through zext. + if (isa<ZExtInst>(Inst)) + return true; + + // sext(sext) is ok too. + if (IsSExt && isa<SExtInst>(Inst)) + return true; + + // We can get through binary operator, if it is legal. In other words, the + // binary operator must have a nuw or nsw flag. + const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); + if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && + ((!IsSExt && BinOp->hasNoUnsignedWrap()) || + (IsSExt && BinOp->hasNoSignedWrap()))) + return true; + + // Check if we can do the following simplification. + // ext(trunc(opnd)) --> ext(opnd) + if (!isa<TruncInst>(Inst)) + return false; + + Value *OpndVal = Inst->getOperand(0); + // Check if we can use this operand in the extension. + // If the type is larger than the result type of the extension, we cannot. + if (!OpndVal->getType()->isIntegerTy() || + OpndVal->getType()->getIntegerBitWidth() > + ConsideredExtType->getIntegerBitWidth()) + return false; + + // If the operand of the truncate is not an instruction, we will not have + // any information on the dropped bits. + // (Actually we could for constant but it is not worth the extra logic). + Instruction *Opnd = dyn_cast<Instruction>(OpndVal); + if (!Opnd) + return false; + + // Check if the source of the type is narrow enough. + // I.e., check that trunc just drops extended bits of the same kind of + // the extension. + // #1 get the type of the operand and check the kind of the extended bits. + const Type *OpndType; + InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); + if (It != PromotedInsts.end() && It->second.getInt() == IsSExt) + OpndType = It->second.getPointer(); + else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) + OpndType = Opnd->getOperand(0)->getType(); + else + return false; + + // #2 check that the truncate just drops extended bits. + return Inst->getType()->getIntegerBitWidth() >= + OpndType->getIntegerBitWidth(); +} + +TypePromotionHelper::Action TypePromotionHelper::getAction( + Instruction *Ext, const SetOfInstrs &InsertedInsts, + const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { + assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && + "Unexpected instruction type"); + Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); + Type *ExtTy = Ext->getType(); + bool IsSExt = isa<SExtInst>(Ext); + // If the operand of the extension is not an instruction, we cannot + // get through. + // If it, check we can get through. + if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) + return nullptr; + + // Do not promote if the operand has been added by codegenprepare. + // Otherwise, it means we are undoing an optimization that is likely to be + // redone, thus causing potential infinite loop. + if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd)) + return nullptr; + + // SExt or Trunc instructions. + // Return the related handler. + if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || + isa<ZExtInst>(ExtOpnd)) + return promoteOperandForTruncAndAnyExt; + + // Regular instruction. + // Abort early if we will have to insert non-free instructions. + if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) + return nullptr; + return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; +} + +Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( + llvm::Instruction *SExt, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { + // By construction, the operand of SExt is an instruction. Otherwise we cannot + // get through it and this method should not be called. + Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); + Value *ExtVal = SExt; + bool HasMergedNonFreeExt = false; + if (isa<ZExtInst>(SExtOpnd)) { + // Replace s|zext(zext(opnd)) + // => zext(opnd). + HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); + Value *ZExt = + TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); + TPT.replaceAllUsesWith(SExt, ZExt); + TPT.eraseInstruction(SExt); + ExtVal = ZExt; + } else { + // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) + // => z|sext(opnd). + TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); + } + CreatedInstsCost = 0; + + // Remove dead code. + if (SExtOpnd->use_empty()) + TPT.eraseInstruction(SExtOpnd); + + // Check if the extension is still needed. + Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); + if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { + if (ExtInst) { + if (Exts) + Exts->push_back(ExtInst); + CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; + } + return ExtVal; + } + + // At this point we have: ext ty opnd to ty. + // Reassign the uses of ExtInst to the opnd and remove ExtInst. + Value *NextVal = ExtInst->getOperand(0); + TPT.eraseInstruction(ExtInst, NextVal); + return NextVal; +} + +Value *TypePromotionHelper::promoteOperandForOther( + Instruction *Ext, TypePromotionTransaction &TPT, + InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, + SmallVectorImpl<Instruction *> *Exts, + SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, + bool IsSExt) { + // By construction, the operand of Ext is an instruction. Otherwise we cannot + // get through it and this method should not be called. + Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); + CreatedInstsCost = 0; + if (!ExtOpnd->hasOneUse()) { + // ExtOpnd will be promoted. + // All its uses, but Ext, will need to use a truncated value of the + // promoted version. + // Create the truncate now. + Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); + if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { + ITrunc->removeFromParent(); + // Insert it just after the definition. + ITrunc->insertAfter(ExtOpnd); + if (Truncs) + Truncs->push_back(ITrunc); + } + + TPT.replaceAllUsesWith(ExtOpnd, Trunc); + // Restore the operand of Ext (which has been replaced by the previous call + // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. + TPT.setOperand(Ext, 0, ExtOpnd); + } + + // Get through the Instruction: + // 1. Update its type. + // 2. Replace the uses of Ext by Inst. + // 3. Extend each operand that needs to be extended. + + // Remember the original type of the instruction before promotion. + // This is useful to know that the high bits are sign extended bits. + PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>( + ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt))); + // Step #1. + TPT.mutateType(ExtOpnd, Ext->getType()); + // Step #2. + TPT.replaceAllUsesWith(Ext, ExtOpnd); + // Step #3. + Instruction *ExtForOpnd = Ext; + + DEBUG(dbgs() << "Propagate Ext to operands\n"); + for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; + ++OpIdx) { + DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n'); + if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || + !shouldExtOperand(ExtOpnd, OpIdx)) { + DEBUG(dbgs() << "No need to propagate\n"); + continue; + } + // Check if we can statically extend the operand. + Value *Opnd = ExtOpnd->getOperand(OpIdx); + if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { + DEBUG(dbgs() << "Statically extend\n"); + unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); + APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) + : Cst->getValue().zext(BitWidth); + TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); + continue; + } + // UndefValue are typed, so we have to statically sign extend them. + if (isa<UndefValue>(Opnd)) { + DEBUG(dbgs() << "Statically extend\n"); + TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); + continue; + } + + // Otherwise we have to explicity sign extend the operand. + // Check if Ext was reused to extend an operand. + if (!ExtForOpnd) { + // If yes, create a new one. + DEBUG(dbgs() << "More operands to ext\n"); + Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) + : TPT.createZExt(Ext, Opnd, Ext->getType()); + if (!isa<Instruction>(ValForExtOpnd)) { + TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); + continue; + } + ExtForOpnd = cast<Instruction>(ValForExtOpnd); + } + if (Exts) + Exts->push_back(ExtForOpnd); + TPT.setOperand(ExtForOpnd, 0, Opnd); + + // Move the sign extension before the insertion point. + TPT.moveBefore(ExtForOpnd, ExtOpnd); + TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); + CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); + // If more sext are required, new instructions will have to be created. + ExtForOpnd = nullptr; + } + if (ExtForOpnd == Ext) { + DEBUG(dbgs() << "Extension is useless now\n"); + TPT.eraseInstruction(Ext); + } + return ExtOpnd; +} + +/// Check whether or not promoting an instruction to a wider type is profitable. +/// \p NewCost gives the cost of extension instructions created by the +/// promotion. +/// \p OldCost gives the cost of extension instructions before the promotion +/// plus the number of instructions that have been +/// matched in the addressing mode the promotion. +/// \p PromotedOperand is the value that has been promoted. +/// \return True if the promotion is profitable, false otherwise. +bool AddressingModeMatcher::isPromotionProfitable( + unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { + DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'); + // The cost of the new extensions is greater than the cost of the + // old extension plus what we folded. + // This is not profitable. + if (NewCost > OldCost) + return false; + if (NewCost < OldCost) + return true; + // The promotion is neutral but it may help folding the sign extension in + // loads for instance. + // Check that we did not create an illegal instruction. + return isPromotedInstructionLegal(TLI, DL, PromotedOperand); +} + +/// Given an instruction or constant expr, see if we can fold the operation +/// into the addressing mode. If so, update the addressing mode and return +/// true, otherwise return false without modifying AddrMode. +/// If \p MovedAway is not NULL, it contains the information of whether or +/// not AddrInst has to be folded into the addressing mode on success. +/// If \p MovedAway == true, \p AddrInst will not be part of the addressing +/// because it has been moved away. +/// Thus AddrInst must not be added in the matched instructions. +/// This state can happen when AddrInst is a sext, since it may be moved away. +/// Therefore, AddrInst may not be valid when MovedAway is true and it must +/// not be referenced anymore. +bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, + unsigned Depth, + bool *MovedAway) { + // Avoid exponential behavior on extremely deep expression trees. + if (Depth >= 5) return false; + + // By default, all matched instructions stay in place. + if (MovedAway) + *MovedAway = false; + + switch (Opcode) { + case Instruction::PtrToInt: + // PtrToInt is always a noop, as we know that the int type is pointer sized. + return matchAddr(AddrInst->getOperand(0), Depth); + case Instruction::IntToPtr: { + auto AS = AddrInst->getType()->getPointerAddressSpace(); + auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); + // This inttoptr is a no-op if the integer type is pointer sized. + if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) + return matchAddr(AddrInst->getOperand(0), Depth); + return false; + } + case Instruction::BitCast: + // BitCast is always a noop, and we can handle it as long as it is + // int->int or pointer->pointer (we don't want int<->fp or something). + if ((AddrInst->getOperand(0)->getType()->isPointerTy() || + AddrInst->getOperand(0)->getType()->isIntegerTy()) && + // Don't touch identity bitcasts. These were probably put here by LSR, + // and we don't want to mess around with them. Assume it knows what it + // is doing. + AddrInst->getOperand(0)->getType() != AddrInst->getType()) + return matchAddr(AddrInst->getOperand(0), Depth); + return false; + case Instruction::AddrSpaceCast: { + unsigned SrcAS + = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); + unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); + if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + return matchAddr(AddrInst->getOperand(0), Depth); + return false; + } + case Instruction::Add: { + // Check to see if we can merge in the RHS then the LHS. If so, we win. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + // Start a transaction at this point. + // The LHS may match but not the RHS. + // Therefore, we need a higher level restoration point to undo partially + // matched operation. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + + if (matchAddr(AddrInst->getOperand(1), Depth+1) && + matchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + + // Restore the old addr mode info. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + + // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. + if (matchAddr(AddrInst->getOperand(0), Depth+1) && + matchAddr(AddrInst->getOperand(1), Depth+1)) + return true; + + // Otherwise we definitely can't merge the ADD in. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + break; + } + //case Instruction::Or: + // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. + //break; + case Instruction::Mul: + case Instruction::Shl: { + // Can only handle X*C and X << C. + ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); + if (!RHS) + return false; + int64_t Scale = RHS->getSExtValue(); + if (Opcode == Instruction::Shl) + Scale = 1LL << Scale; + + return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); + } + case Instruction::GetElementPtr: { + // Scan the GEP. We check it if it contains constant offsets and at most + // one variable offset. + int VariableOperand = -1; + unsigned VariableScale = 0; + + int64_t ConstantOffset = 0; + gep_type_iterator GTI = gep_type_begin(AddrInst); + for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { + if (StructType *STy = dyn_cast<StructType>(*GTI)) { + const StructLayout *SL = DL.getStructLayout(STy); + unsigned Idx = + cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); + ConstantOffset += SL->getElementOffset(Idx); + } else { + uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); + if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { + ConstantOffset += CI->getSExtValue()*TypeSize; + } else if (TypeSize) { // Scales of zero don't do anything. + // We only allow one variable index at the moment. + if (VariableOperand != -1) + return false; + + // Remember the variable index. + VariableOperand = i; + VariableScale = TypeSize; + } + } + } + + // A common case is for the GEP to only do a constant offset. In this case, + // just add it to the disp field and check validity. + if (VariableOperand == -1) { + AddrMode.BaseOffs += ConstantOffset; + if (ConstantOffset == 0 || + TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { + // Check to see if we can fold the base pointer in too. + if (matchAddr(AddrInst->getOperand(0), Depth+1)) + return true; + } + AddrMode.BaseOffs -= ConstantOffset; + return false; + } + + // Save the valid addressing mode in case we can't match. + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // See if the scale and offset amount is valid for this target. + AddrMode.BaseOffs += ConstantOffset; + + // Match the base operand of the GEP. + if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { + // If it couldn't be matched, just stuff the value in a register. + if (AddrMode.HasBaseReg) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + } + + // Match the remaining variable portion of the GEP. + if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, + Depth)) { + // If it couldn't be matched, try stuffing the base into a register + // instead of matching it, and retrying the match of the scale. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + if (AddrMode.HasBaseReg) + return false; + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = AddrInst->getOperand(0); + AddrMode.BaseOffs += ConstantOffset; + if (!matchScaledValue(AddrInst->getOperand(VariableOperand), + VariableScale, Depth)) { + // If even that didn't work, bail. + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + return false; + } + } + + return true; + } + case Instruction::SExt: + case Instruction::ZExt: { + Instruction *Ext = dyn_cast<Instruction>(AddrInst); + if (!Ext) + return false; + + // Try to move this ext out of the way of the addressing mode. + // Ask for a method for doing so. + TypePromotionHelper::Action TPH = + TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts); + if (!TPH) + return false; + + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + unsigned CreatedInstsCost = 0; + unsigned ExtCost = !TLI.isExtFree(Ext); + Value *PromotedOperand = + TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); + // SExt has been moved away. + // Thus either it will be rematched later in the recursive calls or it is + // gone. Anyway, we must not fold it into the addressing mode at this point. + // E.g., + // op = add opnd, 1 + // idx = ext op + // addr = gep base, idx + // is now: + // promotedOpnd = ext opnd <- no match here + // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) + // addr = gep base, op <- match + if (MovedAway) + *MovedAway = true; + + assert(PromotedOperand && + "TypePromotionHelper should have filtered out those cases"); + + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + if (!matchAddr(PromotedOperand, Depth) || + // The total of the new cost is equal to the cost of the created + // instructions. + // The total of the old cost is equal to the cost of the extension plus + // what we have saved in the addressing mode. + !isPromotionProfitable(CreatedInstsCost, + ExtCost + (AddrModeInsts.size() - OldSize), + PromotedOperand)) { + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + DEBUG(dbgs() << "Sign extension does not pay off: rollback\n"); + TPT.rollback(LastKnownGood); + return false; + } + return true; + } + } + return false; +} + +/// If we can, try to add the value of 'Addr' into the current addressing mode. +/// If Addr can't be added to AddrMode this returns false and leaves AddrMode +/// unmodified. This assumes that Addr is either a pointer type or intptr_t +/// for the target. +/// +bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { + // Start a transaction at this point that we will rollback if the matching + // fails. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { + // Fold in immediates if legal for the target. + AddrMode.BaseOffs += CI->getSExtValue(); + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + return true; + AddrMode.BaseOffs -= CI->getSExtValue(); + } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { + // If this is a global variable, try to fold it into the addressing mode. + if (!AddrMode.BaseGV) { + AddrMode.BaseGV = GV; + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + return true; + AddrMode.BaseGV = nullptr; + } + } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { + ExtAddrMode BackupAddrMode = AddrMode; + unsigned OldSize = AddrModeInsts.size(); + + // Check to see if it is possible to fold this operation. + bool MovedAway = false; + if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { + // This instruction may have been moved away. If so, there is nothing + // to check here. + if (MovedAway) + return true; + // Okay, it's possible to fold this. Check to see if it is actually + // *profitable* to do so. We use a simple cost model to avoid increasing + // register pressure too much. + if (I->hasOneUse() || + isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { + AddrModeInsts.push_back(I); + return true; + } + + // It isn't profitable to do this, roll back. + //cerr << "NOT FOLDING: " << *I; + AddrMode = BackupAddrMode; + AddrModeInsts.resize(OldSize); + TPT.rollback(LastKnownGood); + } + } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { + if (matchOperationAddr(CE, CE->getOpcode(), Depth)) + return true; + TPT.rollback(LastKnownGood); + } else if (isa<ConstantPointerNull>(Addr)) { + // Null pointer gets folded without affecting the addressing mode. + return true; + } + + // Worse case, the target should support [reg] addressing modes. :) + if (!AddrMode.HasBaseReg) { + AddrMode.HasBaseReg = true; + AddrMode.BaseReg = Addr; + // Still check for legality in case the target supports [imm] but not [i+r]. + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + return true; + AddrMode.HasBaseReg = false; + AddrMode.BaseReg = nullptr; + } + + // If the base register is already taken, see if we can do [r+r]. + if (AddrMode.Scale == 0) { + AddrMode.Scale = 1; + AddrMode.ScaledReg = Addr; + if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) + return true; + AddrMode.Scale = 0; + AddrMode.ScaledReg = nullptr; + } + // Couldn't match. + TPT.rollback(LastKnownGood); + return false; +} + +/// Check to see if all uses of OpVal by the specified inline asm call are due +/// to memory operands. If so, return true, otherwise return false. +static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, + const TargetMachine &TM) { + const Function *F = CI->getParent()->getParent(); + const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering(); + const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI, + ImmutableCallSite(CI)); + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI->ComputeConstraintToUse(OpInfo, SDValue()); + + // If this asm operand is our Value*, and if it isn't an indirect memory + // operand, we can't fold it! + if (OpInfo.CallOperandVal == OpVal && + (OpInfo.ConstraintType != TargetLowering::C_Memory || + !OpInfo.isIndirect)) + return false; + } + + return true; +} + +/// Recursively walk all the uses of I until we find a memory use. +/// If we find an obviously non-foldable instruction, return true. +/// Add the ultimately found memory instructions to MemoryUses. +static bool FindAllMemoryUses( + Instruction *I, + SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, + SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) { + // If we already considered this instruction, we're done. + if (!ConsideredInsts.insert(I).second) + return false; + + // If this is an obviously unfoldable instruction, bail out. + if (!MightBeFoldableInst(I)) + return true; + + // Loop over all the uses, recursively processing them. + for (Use &U : I->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + + if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { + MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); + continue; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { + unsigned opNo = U.getOperandNo(); + if (opNo == 0) return true; // Storing addr, not into addr. + MemoryUses.push_back(std::make_pair(SI, opNo)); + continue; + } + + if (CallInst *CI = dyn_cast<CallInst>(UserI)) { + InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); + if (!IA) return true; + + // If this is a memory operand, we're cool, otherwise bail out. + if (!IsOperandAMemoryOperand(CI, IA, I, TM)) + return true; + continue; + } + + if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM)) + return true; + } + + return false; +} + +/// Return true if Val is already known to be live at the use site that we're +/// folding it into. If so, there is no cost to include it in the addressing +/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the +/// instruction already. +bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, + Value *KnownLive2) { + // If Val is either of the known-live values, we know it is live! + if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) + return true; + + // All values other than instructions and arguments (e.g. constants) are live. + if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; + + // If Val is a constant sized alloca in the entry block, it is live, this is + // true because it is just a reference to the stack/frame pointer, which is + // live for the whole function. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) + if (AI->isStaticAlloca()) + return true; + + // Check to see if this value is already used in the memory instruction's + // block. If so, it's already live into the block at the very least, so we + // can reasonably fold it. + return Val->isUsedInBasicBlock(MemoryInst->getParent()); +} + +/// It is possible for the addressing mode of the machine to fold the specified +/// instruction into a load or store that ultimately uses it. +/// However, the specified instruction has multiple uses. +/// Given this, it may actually increase register pressure to fold it +/// into the load. For example, consider this code: +/// +/// X = ... +/// Y = X+1 +/// use(Y) -> nonload/store +/// Z = Y+1 +/// load Z +/// +/// In this case, Y has multiple uses, and can be folded into the load of Z +/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to +/// be live at the use(Y) line. If we don't fold Y into load Z, we use one +/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the +/// number of computations either. +/// +/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If +/// X was live across 'load Z' for other reasons, we actually *would* want to +/// fold the addressing mode in the Z case. This would make Y die earlier. +bool AddressingModeMatcher:: +isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, + ExtAddrMode &AMAfter) { + if (IgnoreProfitability) return true; + + // AMBefore is the addressing mode before this instruction was folded into it, + // and AMAfter is the addressing mode after the instruction was folded. Get + // the set of registers referenced by AMAfter and subtract out those + // referenced by AMBefore: this is the set of values which folding in this + // address extends the lifetime of. + // + // Note that there are only two potential values being referenced here, + // BaseReg and ScaleReg (global addresses are always available, as are any + // folded immediates). + Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; + + // If the BaseReg or ScaledReg was referenced by the previous addrmode, their + // lifetime wasn't extended by adding this instruction. + if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + BaseReg = nullptr; + if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) + ScaledReg = nullptr; + + // If folding this instruction (and it's subexprs) didn't extend any live + // ranges, we're ok with it. + if (!BaseReg && !ScaledReg) + return true; + + // If all uses of this instruction are ultimately load/store/inlineasm's, + // check to see if their addressing modes will include this instruction. If + // so, we can fold it into all uses, so it doesn't matter if it has multiple + // uses. + SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; + SmallPtrSet<Instruction*, 16> ConsideredInsts; + if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM)) + return false; // Has a non-memory, non-foldable use! + + // Now that we know that all uses of this instruction are part of a chain of + // computation involving only operations that could theoretically be folded + // into a memory use, loop over each of these uses and see if they could + // *actually* fold the instruction. + SmallVector<Instruction*, 32> MatchedAddrModeInsts; + for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { + Instruction *User = MemoryUses[i].first; + unsigned OpNo = MemoryUses[i].second; + + // Get the access type of this use. If the use isn't a pointer, we don't + // know what it accesses. + Value *Address = User->getOperand(OpNo); + PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); + if (!AddrTy) + return false; + Type *AddressAccessTy = AddrTy->getElementType(); + unsigned AS = AddrTy->getAddressSpace(); + + // Do a match against the root of this address, ignoring profitability. This + // will tell us if the addressing mode for the memory operation will + // *actually* cover the shared instruction. + ExtAddrMode Result; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS, + MemoryInst, Result, InsertedInsts, + PromotedInsts, TPT); + Matcher.IgnoreProfitability = true; + bool Success = Matcher.matchAddr(Address, 0); + (void)Success; assert(Success && "Couldn't select *anything*?"); + + // The match was to check the profitability, the changes made are not + // part of the original matcher. Therefore, they should be dropped + // otherwise the original matcher will not present the right state. + TPT.rollback(LastKnownGood); + + // If the match didn't cover I, then it won't be shared by it. + if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), + I) == MatchedAddrModeInsts.end()) + return false; + + MatchedAddrModeInsts.clear(); + } + + return true; +} + +} // end anonymous namespace + +/// Return true if the specified values are defined in a +/// different basic block than BB. +static bool IsNonLocalValue(Value *V, BasicBlock *BB) { + if (Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() != BB; + return false; +} + +/// Load and Store Instructions often have addressing modes that can do +/// significant amounts of computation. As such, instruction selection will try +/// to get the load or store to do as much computation as possible for the +/// program. The problem is that isel can only see within a single block. As +/// such, we sink as much legal addressing mode work into the block as possible. +/// +/// This method is used to optimize both load/store and inline asms with memory +/// operands. +bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, + Type *AccessTy, unsigned AddrSpace) { + Value *Repl = Addr; + + // Try to collapse single-value PHI nodes. This is necessary to undo + // unprofitable PRE transformations. + SmallVector<Value*, 8> worklist; + SmallPtrSet<Value*, 16> Visited; + worklist.push_back(Addr); + + // Use a worklist to iteratively look through PHI nodes, and ensure that + // the addressing mode obtained from the non-PHI roots of the graph + // are equivalent. + Value *Consensus = nullptr; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; + SmallVector<Instruction*, 16> AddrModeInsts; + ExtAddrMode AddrMode; + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + while (!worklist.empty()) { + Value *V = worklist.back(); + worklist.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(V).second) { + Consensus = nullptr; + break; + } + + // For a PHI node, push all of its incoming values. + if (PHINode *P = dyn_cast<PHINode>(V)) { + for (Value *IncValue : P->incoming_values()) + worklist.push_back(IncValue); + continue; + } + + // For non-PHIs, determine the addressing mode being computed. + SmallVector<Instruction*, 16> NewAddrModeInsts; + ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( + V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM, + InsertedInsts, PromotedInsts, TPT); + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { + Consensus = V; + NumUsesConsensus = NumUses; + AddrModeInsts = NewAddrModeInsts; + } + continue; + } + + Consensus = nullptr; + break; + } + + // If the addressing mode couldn't be determined, or if multiple different + // ones were determined, bail out now. + if (!Consensus) { + TPT.rollback(LastKnownGood); + return false; + } + TPT.commit(); + + // Check to see if any of the instructions supersumed by this addr mode are + // non-local to I's BB. + bool AnyNonLocal = false; + for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { + if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { + AnyNonLocal = true; + break; + } + } + + // If all the instructions matched are already in this BB, don't do anything. + if (!AnyNonLocal) { + DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); + return false; + } + + // Insert this computation right after this user. Since our caller is + // scanning from the top of the BB to the bottom, reuse of the expr are + // guaranteed to happen later. + IRBuilder<> Builder(MemoryInst); + + // Now that we determined the addressing expression we want to use and know + // that we have to sink it into this block. Check to see if we have already + // done this for some other load/store instr in this block. If so, reuse the + // computation. + Value *&SunkAddr = SunkAddrs[Addr]; + if (SunkAddr) { + DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } else if (AddrSinkUsingGEPs || + (!AddrSinkUsingGEPs.getNumOccurrences() && TM && + TM->getSubtargetImpl(*MemoryInst->getParent()->getParent()) + ->useAA())) { + // By default, we use the GEP-based method when AA is used later. This + // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); + Value *ResultPtr = nullptr, *ResultIndex = nullptr; + + // First, find the pointer. + if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { + ResultPtr = AddrMode.BaseReg; + AddrMode.BaseReg = nullptr; + } + + if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { + // We can't add more than one pointer together, nor can we scale a + // pointer (both of which seem meaningless). + if (ResultPtr || AddrMode.Scale != 1) + return false; + + ResultPtr = AddrMode.ScaledReg; + AddrMode.Scale = 0; + } + + if (AddrMode.BaseGV) { + if (ResultPtr) + return false; + + ResultPtr = AddrMode.BaseGV; + } + + // If the real base value actually came from an inttoptr, then the matcher + // will look through it and provide only the integer value. In that case, + // use it here. + if (!ResultPtr && AddrMode.BaseReg) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr"); + AddrMode.BaseReg = nullptr; + } else if (!ResultPtr && AddrMode.Scale == 1) { + ResultPtr = + Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr"); + AddrMode.Scale = 0; + } + + if (!ResultPtr && + !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { + SunkAddr = Constant::getNullValue(Addr->getType()); + } else if (!ResultPtr) { + return false; + } else { + Type *I8PtrTy = + Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); + Type *I8Ty = Builder.getInt8Ty(); + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + + ResultIndex = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex); + if (I && (ResultIndex != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (ResultIndex) + ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); + else + ResultIndex = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (ResultIndex) { + // We need to add this separately from the scale above to help with + // SDAG consecutive load/store merging. + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + } + + ResultIndex = V; + } + + if (!ResultIndex) { + SunkAddr = ResultPtr; + } else { + if (ResultPtr->getType() != I8PtrTy) + ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy); + SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); + } + + if (SunkAddr->getType() != Addr->getType()) + SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); + } + } else { + DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " + << *MemoryInst << "\n"); + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); + Value *Result = nullptr; + + // Start with the base register. Do this first so that subsequent address + // matching finds it last, which will prevent it from trying to match it + // as the scaled value in case it happens to be a mul. That would be + // problematic if we've sunk a different mul for the scale, because then + // we'd end up sinking both muls. + if (AddrMode.BaseReg) { + Value *V = AddrMode.BaseReg; + if (V->getType()->isPointerTy()) + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); + if (V->getType() != IntPtrTy) + V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); + Result = V; + } + + // Add the scale value. + if (AddrMode.Scale) { + Value *V = AddrMode.ScaledReg; + if (V->getType() == IntPtrTy) { + // done. + } else if (V->getType()->isPointerTy()) { + V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); + } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < + cast<IntegerType>(V->getType())->getBitWidth()) { + V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); + } else { + // It is only safe to sign extend the BaseReg if we know that the math + // required to create it did not overflow before we extend it. Since + // the original IR value was tossed in favor of a constant back when + // the AddrMode was created we need to bail out gracefully if widths + // do not match instead of extending it. + Instruction *I = dyn_cast_or_null<Instruction>(Result); + if (I && (Result != AddrMode.BaseReg)) + I->eraseFromParent(); + return false; + } + if (AddrMode.Scale != 1) + V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), + "sunkaddr"); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + // Add in the BaseGV if present. + if (AddrMode.BaseGV) { + Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + // Add in the Base Offset if present. + if (AddrMode.BaseOffs) { + Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); + if (Result) + Result = Builder.CreateAdd(Result, V, "sunkaddr"); + else + Result = V; + } + + if (!Result) + SunkAddr = Constant::getNullValue(Addr->getType()); + else + SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); + } + + MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + + // If we have no uses, recursively delete the value and all dead instructions + // using it. + if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(&*CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + + RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); + + if (IterHandle != CurInstIterator.getNodePtrUnchecked()) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } + } + ++NumMemoryInsts; + return true; +} + +/// If there are any memory operands, use OptimizeMemoryInst to sink their +/// address computing into the block when possible / profitable. +bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { + bool MadeChange = false; + + const TargetRegisterInfo *TRI = + TM->getSubtargetImpl(*CS->getParent()->getParent())->getRegisterInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = + TLI->ParseConstraints(*DL, TRI, CS); + unsigned ArgNo = 0; + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI->ComputeConstraintToUse(OpInfo, SDValue()); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.isIndirect) { + Value *OpVal = CS->getArgOperand(ArgNo++); + MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); + } else if (OpInfo.Type == InlineAsm::isInput) + ArgNo++; + } + + return MadeChange; +} + +/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or +/// sign extensions. +static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) { + assert(!Inst->use_empty() && "Input must have at least one use"); + const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin()); + bool IsSExt = isa<SExtInst>(FirstUser); + Type *ExtTy = FirstUser->getType(); + for (const User *U : Inst->users()) { + const Instruction *UI = cast<Instruction>(U); + if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) + return false; + Type *CurTy = UI->getType(); + // Same input and output types: Same instruction after CSE. + if (CurTy == ExtTy) + continue; + + // If IsSExt is true, we are in this situation: + // a = Inst + // b = sext ty1 a to ty2 + // c = sext ty1 a to ty3 + // Assuming ty2 is shorter than ty3, this could be turned into: + // a = Inst + // b = sext ty1 a to ty2 + // c = sext ty2 b to ty3 + // However, the last sext is not free. + if (IsSExt) + return false; + + // This is a ZExt, maybe this is free to extend from one type to another. + // In that case, we would not account for a different use. + Type *NarrowTy; + Type *LargeTy; + if (ExtTy->getScalarType()->getIntegerBitWidth() > + CurTy->getScalarType()->getIntegerBitWidth()) { + NarrowTy = CurTy; + LargeTy = ExtTy; + } else { + NarrowTy = ExtTy; + LargeTy = CurTy; + } + + if (!TLI.isZExtFree(NarrowTy, LargeTy)) + return false; + } + // All uses are the same or can be derived from one another for free. + return true; +} + +/// \brief Try to form ExtLd by promoting \p Exts until they reach a +/// load instruction. +/// If an ext(load) can be formed, it is returned via \p LI for the load +/// and \p Inst for the extension. +/// Otherwise LI == nullptr and Inst == nullptr. +/// When some promotion happened, \p TPT contains the proper state to +/// revert them. +/// +/// \return true when promoting was necessary to expose the ext(load) +/// opportunity, false otherwise. +/// +/// Example: +/// \code +/// %ld = load i32* %addr +/// %add = add nuw i32 %ld, 4 +/// %zext = zext i32 %add to i64 +/// \endcode +/// => +/// \code +/// %ld = load i32* %addr +/// %zext = zext i32 %ld to i64 +/// %add = add nuw i64 %zext, 4 +/// \encode +/// Thanks to the promotion, we can match zext(load i32*) to i64. +bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT, + LoadInst *&LI, Instruction *&Inst, + const SmallVectorImpl<Instruction *> &Exts, + unsigned CreatedInstsCost = 0) { + // Iterate over all the extensions to see if one form an ext(load). + for (auto I : Exts) { + // Check if we directly have ext(load). + if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) { + Inst = I; + // No promotion happened here. + return false; + } + // Check whether or not we want to do any promotion. + if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) + continue; + // Get the action to perform the promotion. + TypePromotionHelper::Action TPH = TypePromotionHelper::getAction( + I, InsertedInsts, *TLI, PromotedInsts); + // Check if we can promote. + if (!TPH) + continue; + // Save the current state. + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector<Instruction *, 4> NewExts; + unsigned NewCreatedInstsCost = 0; + unsigned ExtCost = !TLI->isExtFree(I); + // Promote. + Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, + &NewExts, nullptr, *TLI); + assert(PromotedVal && + "TypePromotionHelper should have filtered out those cases"); + + // We would be able to merge only one extension in a load. + // Therefore, if we have more than 1 new extension we heuristically + // cut this search path, because it means we degrade the code quality. + // With exactly 2, the transformation is neutral, because we will merge + // one extension but leave one. However, we optimistically keep going, + // because the new extension may be removed too. + long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; + TotalCreatedInstsCost -= ExtCost; + if (!StressExtLdPromotion && + (TotalCreatedInstsCost > 1 || + !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { + // The promotion is not profitable, rollback to the previous state. + TPT.rollback(LastKnownGood); + continue; + } + // The promotion is profitable. + // Check if it exposes an ext(load). + (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost); + if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || + // If we have created a new extension, i.e., now we have two + // extensions. We must make sure one of them is merged with + // the load, otherwise we may degrade the code quality. + (LI->hasOneUse() || hasSameExtUse(LI, *TLI)))) + // Promotion happened. + return true; + // If this does not help to expose an ext(load) then, rollback. + TPT.rollback(LastKnownGood); + } + // None of the extension can form an ext(load). + LI = nullptr; + Inst = nullptr; + return false; +} + +/// Move a zext or sext fed by a load into the same basic block as the load, +/// unless conditions are unfavorable. This allows SelectionDAG to fold the +/// extend into the load. +/// \p I[in/out] the extension may be modified during the process if some +/// promotions apply. +/// +bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) { + // Try to promote a chain of computation if it allows to form + // an extended load. + TypePromotionTransaction TPT; + TypePromotionTransaction::ConstRestorationPt LastKnownGood = + TPT.getRestorationPoint(); + SmallVector<Instruction *, 1> Exts; + Exts.push_back(I); + // Look for a load being extended. + LoadInst *LI = nullptr; + Instruction *OldExt = I; + bool HasPromoted = extLdPromotion(TPT, LI, I, Exts); + if (!LI || !I) { + assert(!HasPromoted && !LI && "If we did not match any load instruction " + "the code must remain the same"); + I = OldExt; + return false; + } + + // If they're already in the same block, there's nothing to do. + // Make the cheap checks first if we did not promote. + // If we promoted, we need to check if it is indeed profitable. + if (!HasPromoted && LI->getParent() == I->getParent()) + return false; + + EVT VT = TLI->getValueType(*DL, I->getType()); + EVT LoadVT = TLI->getValueType(*DL, LI->getType()); + + // If the load has other users and the truncate is not free, this probably + // isn't worthwhile. + if (!LI->hasOneUse() && TLI && + (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) && + !TLI->isTruncateFree(I->getType(), LI->getType())) { + I = OldExt; + TPT.rollback(LastKnownGood); + return false; + } + + // Check whether the target supports casts folded into loads. + unsigned LType; + if (isa<ZExtInst>(I)) + LType = ISD::ZEXTLOAD; + else { + assert(isa<SExtInst>(I) && "Unexpected ext type!"); + LType = ISD::SEXTLOAD; + } + if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) { + I = OldExt; + TPT.rollback(LastKnownGood); + return false; + } + + // Move the extend into the same block as the load, so that SelectionDAG + // can fold it. + TPT.commit(); + I->removeFromParent(); + I->insertAfter(LI); + ++NumExtsMoved; + return true; +} + +bool CodeGenPrepare::optimizeExtUses(Instruction *I) { + BasicBlock *DefBB = I->getParent(); + + // If the result of a {s|z}ext and its source are both live out, rewrite all + // other uses of the source with result of extension. + Value *Src = I->getOperand(0); + if (Src->hasOneUse()) + return false; + + // Only do this xform if truncating is free. + if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) + return false; + + // Only safe to perform the optimization if the source is also defined in + // this block. + if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) + return false; + + bool DefIsLiveOut = false; + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + DefIsLiveOut = true; + break; + } + if (!DefIsLiveOut) + return false; + + // Make sure none of the uses are PHI nodes. + for (User *U : Src->users()) { + Instruction *UI = cast<Instruction>(U); + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + // Be conservative. We don't want this xform to end up introducing + // reloads just before load / store instructions. + if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) + return false; + } + + // InsertedTruncs - Only insert one trunc in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedTruncs; + + bool MadeChange = false; + for (Use &U : Src->uses()) { + Instruction *User = cast<Instruction>(U.getUser()); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = User->getParent(); + if (UserBB == DefBB) continue; + + // Both src and def are live in this block. Rewrite the use. + Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; + + if (!InsertedTrunc) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); + InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); + InsertedInsts.insert(InsertedTrunc); + } + + // Replace a use of the {s|z}ext source with a use of the result. + U = InsertedTrunc; + ++NumExtUses; + MadeChange = true; + } + + return MadeChange; +} + +// Find loads whose uses only use some of the loaded value's bits. Add an "and" +// just after the load if the target can fold this into one extload instruction, +// with the hope of eliminating some of the other later "and" instructions using +// the loaded value. "and"s that are made trivially redundant by the insertion +// of the new "and" are removed by this function, while others (e.g. those whose +// path from the load goes through a phi) are left for isel to potentially +// remove. +// +// For example: +// +// b0: +// x = load i32 +// ... +// b1: +// y = and x, 0xff +// z = use y +// +// becomes: +// +// b0: +// x = load i32 +// x' = and x, 0xff +// ... +// b1: +// z = use x' +// +// whereas: +// +// b0: +// x1 = load i32 +// ... +// b1: +// x2 = load i32 +// ... +// b2: +// x = phi x1, x2 +// y = and x, 0xff +// +// becomes (after a call to optimizeLoadExt for each load): +// +// b0: +// x1 = load i32 +// x1' = and x1, 0xff +// ... +// b1: +// x2 = load i32 +// x2' = and x2, 0xff +// ... +// b2: +// x = phi x1', x2' +// y = and x, 0xff +// + +bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { + + if (!Load->isSimple() || + !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy())) + return false; + + // Skip loads we've already transformed or have no reason to transform. + if (Load->hasOneUse()) { + User *LoadUser = *Load->user_begin(); + if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() && + !dyn_cast<PHINode>(LoadUser)) + return false; + } + + // Look at all uses of Load, looking through phis, to determine how many bits + // of the loaded value are needed. + SmallVector<Instruction *, 8> WorkList; + SmallPtrSet<Instruction *, 16> Visited; + SmallVector<Instruction *, 8> AndsToMaybeRemove; + for (auto *U : Load->users()) + WorkList.push_back(cast<Instruction>(U)); + + EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); + unsigned BitWidth = LoadResultVT.getSizeInBits(); + APInt DemandBits(BitWidth, 0); + APInt WidestAndBits(BitWidth, 0); + + while (!WorkList.empty()) { + Instruction *I = WorkList.back(); + WorkList.pop_back(); + + // Break use-def graph loops. + if (!Visited.insert(I).second) + continue; + + // For a PHI node, push all of its users. + if (auto *Phi = dyn_cast<PHINode>(I)) { + for (auto *U : Phi->users()) + WorkList.push_back(cast<Instruction>(U)); + continue; + } + + switch (I->getOpcode()) { + case llvm::Instruction::And: { + auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1)); + if (!AndC) + return false; + APInt AndBits = AndC->getValue(); + DemandBits |= AndBits; + // Keep track of the widest and mask we see. + if (AndBits.ugt(WidestAndBits)) + WidestAndBits = AndBits; + if (AndBits == WidestAndBits && I->getOperand(0) == Load) + AndsToMaybeRemove.push_back(I); + break; + } + + case llvm::Instruction::Shl: { + auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1)); + if (!ShlC) + return false; + uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); + auto ShlDemandBits = APInt::getAllOnesValue(BitWidth).lshr(ShiftAmt); + DemandBits |= ShlDemandBits; + break; + } + + case llvm::Instruction::Trunc: { + EVT TruncVT = TLI->getValueType(*DL, I->getType()); + unsigned TruncBitWidth = TruncVT.getSizeInBits(); + auto TruncBits = APInt::getAllOnesValue(TruncBitWidth).zext(BitWidth); + DemandBits |= TruncBits; + break; + } + + default: + return false; + } + } + + uint32_t ActiveBits = DemandBits.getActiveBits(); + // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the + // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, + // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but + // (and (load x) 1) is not matched as a single instruction, rather as a LDR + // followed by an AND. + // TODO: Look into removing this restriction by fixing backends to either + // return false for isLoadExtLegal for i1 or have them select this pattern to + // a single instruction. + // + // Also avoid hoisting if we didn't see any ands with the exact DemandBits + // mask, since these are the only ands that will be removed by isel. + if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) || + WidestAndBits != DemandBits) + return false; + + LLVMContext &Ctx = Load->getType()->getContext(); + Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); + EVT TruncVT = TLI->getValueType(*DL, TruncTy); + + // Reject cases that won't be matched as extloads. + if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || + !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) + return false; + + IRBuilder<> Builder(Load->getNextNode()); + auto *NewAnd = dyn_cast<Instruction>( + Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); + + // Replace all uses of load with new and (except for the use of load in the + // new and itself). + Load->replaceAllUsesWith(NewAnd); + NewAnd->setOperand(0, Load); + + // Remove any and instructions that are now redundant. + for (auto *And : AndsToMaybeRemove) + // Check that the and mask is the same as the one we decided to put on the + // new and. + if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) { + And->replaceAllUsesWith(NewAnd); + if (&*CurInstIterator == And) + CurInstIterator = std::next(And->getIterator()); + And->eraseFromParent(); + ++NumAndUses; + } + + ++NumAndsAdded; + return true; +} + +/// Check if V (an operand of a select instruction) is an expensive instruction +/// that is only used once. +static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { + auto *I = dyn_cast<Instruction>(V); + // If it's safe to speculatively execute, then it should not have side + // effects; therefore, it's safe to sink and possibly *not* execute. + return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && + TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; +} + +/// Returns true if a SelectInst should be turned into an explicit branch. +static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, + SelectInst *SI) { + // FIXME: This should use the same heuristics as IfConversion to determine + // whether a select is better represented as a branch. This requires that + // branch probability metadata is preserved for the select, which is not the + // case currently. + + CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); + + // If a branch is predictable, an out-of-order CPU can avoid blocking on its + // comparison condition. If the compare has more than one use, there's + // probably another cmov or setcc around, so it's not worth emitting a branch. + if (!Cmp || !Cmp->hasOneUse()) + return false; + + Value *CmpOp0 = Cmp->getOperand(0); + Value *CmpOp1 = Cmp->getOperand(1); + + // Emit "cmov on compare with a memory operand" as a branch to avoid stalls + // on a load from memory. But if the load is used more than once, do not + // change the select to a branch because the load is probably needed + // regardless of whether the branch is taken or not. + if ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || + (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())) + return true; + + // If either operand of the select is expensive and only needed on one side + // of the select, we should form a branch. + if (sinkSelectOperand(TTI, SI->getTrueValue()) || + sinkSelectOperand(TTI, SI->getFalseValue())) + return true; + + return false; +} + + +/// If we have a SelectInst that will likely profit from branch prediction, +/// turn it into a branch. +bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { + bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); + + // Can we convert the 'select' to CF ? + if (DisableSelectToBranch || OptSize || !TLI || VectorCond) + return false; + + TargetLowering::SelectSupportKind SelectKind; + if (VectorCond) + SelectKind = TargetLowering::VectorMaskSelect; + else if (SI->getType()->isVectorTy()) + SelectKind = TargetLowering::ScalarCondVectorVal; + else + SelectKind = TargetLowering::ScalarValSelect; + + // Do we have efficient codegen support for this kind of 'selects' ? + if (TLI->isSelectSupported(SelectKind)) { + // We have efficient codegen support for the select instruction. + // Check if it is profitable to keep this 'select'. + if (!TLI->isPredictableSelectExpensive() || + !isFormingBranchFromSelectProfitable(TTI, SI)) + return false; + } + + ModifiedDT = true; + + // Transform a sequence like this: + // start: + // %cmp = cmp uge i32 %a, %b + // %sel = select i1 %cmp, i32 %c, i32 %d + // + // Into: + // start: + // %cmp = cmp uge i32 %a, %b + // br i1 %cmp, label %select.true, label %select.false + // select.true: + // br label %select.end + // select.false: + // br label %select.end + // select.end: + // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] + // + // In addition, we may sink instructions that produce %c or %d from + // the entry block into the destination(s) of the new branch. + // If the true or false blocks do not contain a sunken instruction, that + // block and its branch may be optimized away. In that case, one side of the + // first branch will point directly to select.end, and the corresponding PHI + // predecessor block will be the start block. + + // First, we split the block containing the select into 2 blocks. + BasicBlock *StartBlock = SI->getParent(); + BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); + BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); + + // Delete the unconditional branch that was just created by the split. + StartBlock->getTerminator()->eraseFromParent(); + + // These are the new basic blocks for the conditional branch. + // At least one will become an actual new basic block. + BasicBlock *TrueBlock = nullptr; + BasicBlock *FalseBlock = nullptr; + + // Sink expensive instructions into the conditional blocks to avoid executing + // them speculatively. + if (sinkSelectOperand(TTI, SI->getTrueValue())) { + TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", + EndBlock->getParent(), EndBlock); + auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock); + auto *TrueInst = cast<Instruction>(SI->getTrueValue()); + TrueInst->moveBefore(TrueBranch); + } + if (sinkSelectOperand(TTI, SI->getFalseValue())) { + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", + EndBlock->getParent(), EndBlock); + auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); + auto *FalseInst = cast<Instruction>(SI->getFalseValue()); + FalseInst->moveBefore(FalseBranch); + } + + // If there was nothing to sink, then arbitrarily choose the 'false' side + // for a new input value to the PHI. + if (TrueBlock == FalseBlock) { + assert(TrueBlock == nullptr && + "Unexpected basic block transform while optimizing select"); + + FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", + EndBlock->getParent(), EndBlock); + BranchInst::Create(EndBlock, FalseBlock); + } + + // Insert the real conditional branch based on the original condition. + // If we did not create a new block for one of the 'true' or 'false' paths + // of the condition, it means that side of the branch goes to the end block + // directly and the path originates from the start block from the point of + // view of the new PHI. + if (TrueBlock == nullptr) { + BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI); + TrueBlock = StartBlock; + } else if (FalseBlock == nullptr) { + BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI); + FalseBlock = StartBlock; + } else { + BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI); + } + + // The select itself is replaced with a PHI Node. + PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); + PN->takeName(SI); + PN->addIncoming(SI->getTrueValue(), TrueBlock); + PN->addIncoming(SI->getFalseValue(), FalseBlock); + + SI->replaceAllUsesWith(PN); + SI->eraseFromParent(); + + // Instruct OptimizeBlock to skip to the next block. + CurInstIterator = StartBlock->end(); + ++NumSelectsExpanded; + return true; +} + +static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { + SmallVector<int, 16> Mask(SVI->getShuffleMask()); + int SplatElem = -1; + for (unsigned i = 0; i < Mask.size(); ++i) { + if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) + return false; + SplatElem = Mask[i]; + } + + return true; +} + +/// Some targets have expensive vector shifts if the lanes aren't all the same +/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases +/// it's often worth sinking a shufflevector splat down to its use so that +/// codegen can spot all lanes are identical. +bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { + BasicBlock *DefBB = SVI->getParent(); + + // Only do this xform if variable vector shifts are particularly expensive. + if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) + return false; + + // We only expect better codegen by sinking a shuffle if we can recognise a + // constant splat. + if (!isBroadcastShuffle(SVI)) + return false; + + // InsertedShuffles - Only insert a shuffle in each block once. + DenseMap<BasicBlock*, Instruction*> InsertedShuffles; + + bool MadeChange = false; + for (User *U : SVI->users()) { + Instruction *UI = cast<Instruction>(U); + + // Figure out which BB this ext is used in. + BasicBlock *UserBB = UI->getParent(); + if (UserBB == DefBB) continue; + + // For now only apply this when the splat is used by a shift instruction. + if (!UI->isShift()) continue; + + // Everything checks out, sink the shuffle if the user's block doesn't + // already have a copy. + Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; + + if (!InsertedShuffle) { + BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); + assert(InsertPt != UserBB->end()); + InsertedShuffle = + new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), + SVI->getOperand(2), "", &*InsertPt); + } + + UI->replaceUsesOfWith(SVI, InsertedShuffle); + MadeChange = true; + } + + // If we removed all uses, nuke the shuffle. + if (SVI->use_empty()) { + SVI->eraseFromParent(); + MadeChange = true; + } + + return MadeChange; +} + +bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { + if (!TLI || !DL) + return false; + + Value *Cond = SI->getCondition(); + Type *OldType = Cond->getType(); + LLVMContext &Context = Cond->getContext(); + MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); + unsigned RegWidth = RegType.getSizeInBits(); + + if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) + return false; + + // If the register width is greater than the type width, expand the condition + // of the switch instruction and each case constant to the width of the + // register. By widening the type of the switch condition, subsequent + // comparisons (for case comparisons) will not need to be extended to the + // preferred register width, so we will potentially eliminate N-1 extends, + // where N is the number of cases in the switch. + auto *NewType = Type::getIntNTy(Context, RegWidth); + + // Zero-extend the switch condition and case constants unless the switch + // condition is a function argument that is already being sign-extended. + // In that case, we can avoid an unnecessary mask/extension by sign-extending + // everything instead. + Instruction::CastOps ExtType = Instruction::ZExt; + if (auto *Arg = dyn_cast<Argument>(Cond)) + if (Arg->hasSExtAttr()) + ExtType = Instruction::SExt; + + auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); + ExtInst->insertBefore(SI); + SI->setCondition(ExtInst); + for (SwitchInst::CaseIt Case : SI->cases()) { + APInt NarrowConst = Case.getCaseValue()->getValue(); + APInt WideConst = (ExtType == Instruction::ZExt) ? + NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); + Case.setValue(ConstantInt::get(Context, WideConst)); + } + + return true; +} + +namespace { +/// \brief Helper class to promote a scalar operation to a vector one. +/// This class is used to move downward extractelement transition. +/// E.g., +/// a = vector_op <2 x i32> +/// b = extractelement <2 x i32> a, i32 0 +/// c = scalar_op b +/// store c +/// +/// => +/// a = vector_op <2 x i32> +/// c = vector_op a (equivalent to scalar_op on the related lane) +/// * d = extractelement <2 x i32> c, i32 0 +/// * store d +/// Assuming both extractelement and store can be combine, we get rid of the +/// transition. +class VectorPromoteHelper { + /// DataLayout associated with the current module. + const DataLayout &DL; + + /// Used to perform some checks on the legality of vector operations. + const TargetLowering &TLI; + + /// Used to estimated the cost of the promoted chain. + const TargetTransformInfo &TTI; + + /// The transition being moved downwards. + Instruction *Transition; + /// The sequence of instructions to be promoted. + SmallVector<Instruction *, 4> InstsToBePromoted; + /// Cost of combining a store and an extract. + unsigned StoreExtractCombineCost; + /// Instruction that will be combined with the transition. + Instruction *CombineInst; + + /// \brief The instruction that represents the current end of the transition. + /// Since we are faking the promotion until we reach the end of the chain + /// of computation, we need a way to get the current end of the transition. + Instruction *getEndOfTransition() const { + if (InstsToBePromoted.empty()) + return Transition; + return InstsToBePromoted.back(); + } + + /// \brief Return the index of the original value in the transition. + /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, + /// c, is at index 0. + unsigned getTransitionOriginalValueIdx() const { + assert(isa<ExtractElementInst>(Transition) && + "Other kind of transitions are not supported yet"); + return 0; + } + + /// \brief Return the index of the index in the transition. + /// E.g., for "extractelement <2 x i32> c, i32 0" the index + /// is at index 1. + unsigned getTransitionIdx() const { + assert(isa<ExtractElementInst>(Transition) && + "Other kind of transitions are not supported yet"); + return 1; + } + + /// \brief Get the type of the transition. + /// This is the type of the original value. + /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the + /// transition is <2 x i32>. + Type *getTransitionType() const { + return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); + } + + /// \brief Promote \p ToBePromoted by moving \p Def downward through. + /// I.e., we have the following sequence: + /// Def = Transition <ty1> a to <ty2> + /// b = ToBePromoted <ty2> Def, ... + /// => + /// b = ToBePromoted <ty1> a, ... + /// Def = Transition <ty1> ToBePromoted to <ty2> + void promoteImpl(Instruction *ToBePromoted); + + /// \brief Check whether or not it is profitable to promote all the + /// instructions enqueued to be promoted. + bool isProfitableToPromote() { + Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); + unsigned Index = isa<ConstantInt>(ValIdx) + ? cast<ConstantInt>(ValIdx)->getZExtValue() + : -1; + Type *PromotedType = getTransitionType(); + + StoreInst *ST = cast<StoreInst>(CombineInst); + unsigned AS = ST->getPointerAddressSpace(); + unsigned Align = ST->getAlignment(); + // Check if this store is supported. + if (!TLI.allowsMisalignedMemoryAccesses( + TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, + Align)) { + // If this is not supported, there is no way we can combine + // the extract with the store. + return false; + } + + // The scalar chain of computation has to pay for the transition + // scalar to vector. + // The vector chain has to account for the combining cost. + uint64_t ScalarCost = + TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); + uint64_t VectorCost = StoreExtractCombineCost; + for (const auto &Inst : InstsToBePromoted) { + // Compute the cost. + // By construction, all instructions being promoted are arithmetic ones. + // Moreover, one argument is a constant that can be viewed as a splat + // constant. + Value *Arg0 = Inst->getOperand(0); + bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || + isa<ConstantFP>(Arg0); + TargetTransformInfo::OperandValueKind Arg0OVK = + IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue + : TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Arg1OVK = + !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue + : TargetTransformInfo::OK_AnyValue; + ScalarCost += TTI.getArithmeticInstrCost( + Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); + VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, + Arg0OVK, Arg1OVK); + } + DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: " + << ScalarCost << "\nVector: " << VectorCost << '\n'); + return ScalarCost > VectorCost; + } + + /// \brief Generate a constant vector with \p Val with the same + /// number of elements as the transition. + /// \p UseSplat defines whether or not \p Val should be replicated + /// across the whole vector. + /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, + /// otherwise we generate a vector with as many undef as possible: + /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only + /// used at the index of the extract. + Value *getConstantVector(Constant *Val, bool UseSplat) const { + unsigned ExtractIdx = UINT_MAX; + if (!UseSplat) { + // If we cannot determine where the constant must be, we have to + // use a splat constant. + Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); + if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) + ExtractIdx = CstVal->getSExtValue(); + else + UseSplat = true; + } + + unsigned End = getTransitionType()->getVectorNumElements(); + if (UseSplat) + return ConstantVector::getSplat(End, Val); + + SmallVector<Constant *, 4> ConstVec; + UndefValue *UndefVal = UndefValue::get(Val->getType()); + for (unsigned Idx = 0; Idx != End; ++Idx) { + if (Idx == ExtractIdx) + ConstVec.push_back(Val); + else + ConstVec.push_back(UndefVal); + } + return ConstantVector::get(ConstVec); + } + + /// \brief Check if promoting to a vector type an operand at \p OperandIdx + /// in \p Use can trigger undefined behavior. + static bool canCauseUndefinedBehavior(const Instruction *Use, + unsigned OperandIdx) { + // This is not safe to introduce undef when the operand is on + // the right hand side of a division-like instruction. + if (OperandIdx != 1) + return false; + switch (Use->getOpcode()) { + default: + return false; + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + return true; + case Instruction::FDiv: + case Instruction::FRem: + return !Use->hasNoNaNs(); + } + llvm_unreachable(nullptr); + } + +public: + VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, + const TargetTransformInfo &TTI, Instruction *Transition, + unsigned CombineCost) + : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), + StoreExtractCombineCost(CombineCost), CombineInst(nullptr) { + assert(Transition && "Do not know how to promote null"); + } + + /// \brief Check if we can promote \p ToBePromoted to \p Type. + bool canPromote(const Instruction *ToBePromoted) const { + // We could support CastInst too. + return isa<BinaryOperator>(ToBePromoted); + } + + /// \brief Check if it is profitable to promote \p ToBePromoted + /// by moving downward the transition through. + bool shouldPromote(const Instruction *ToBePromoted) const { + // Promote only if all the operands can be statically expanded. + // Indeed, we do not want to introduce any new kind of transitions. + for (const Use &U : ToBePromoted->operands()) { + const Value *Val = U.get(); + if (Val == getEndOfTransition()) { + // If the use is a division and the transition is on the rhs, + // we cannot promote the operation, otherwise we may create a + // division by zero. + if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) + return false; + continue; + } + if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && + !isa<ConstantFP>(Val)) + return false; + } + // Check that the resulting operation is legal. + int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); + if (!ISDOpcode) + return false; + return StressStoreExtract || + TLI.isOperationLegalOrCustom( + ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); + } + + /// \brief Check whether or not \p Use can be combined + /// with the transition. + /// I.e., is it possible to do Use(Transition) => AnotherUse? + bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } + + /// \brief Record \p ToBePromoted as part of the chain to be promoted. + void enqueueForPromotion(Instruction *ToBePromoted) { + InstsToBePromoted.push_back(ToBePromoted); + } + + /// \brief Set the instruction that will be combined with the transition. + void recordCombineInstruction(Instruction *ToBeCombined) { + assert(canCombine(ToBeCombined) && "Unsupported instruction to combine"); + CombineInst = ToBeCombined; + } + + /// \brief Promote all the instructions enqueued for promotion if it is + /// is profitable. + /// \return True if the promotion happened, false otherwise. + bool promote() { + // Check if there is something to promote. + // Right now, if we do not have anything to combine with, + // we assume the promotion is not profitable. + if (InstsToBePromoted.empty() || !CombineInst) + return false; + + // Check cost. + if (!StressStoreExtract && !isProfitableToPromote()) + return false; + + // Promote. + for (auto &ToBePromoted : InstsToBePromoted) + promoteImpl(ToBePromoted); + InstsToBePromoted.clear(); + return true; + } +}; +} // End of anonymous namespace. + +void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { + // At this point, we know that all the operands of ToBePromoted but Def + // can be statically promoted. + // For Def, we need to use its parameter in ToBePromoted: + // b = ToBePromoted ty1 a + // Def = Transition ty1 b to ty2 + // Move the transition down. + // 1. Replace all uses of the promoted operation by the transition. + // = ... b => = ... Def. + assert(ToBePromoted->getType() == Transition->getType() && + "The type of the result of the transition does not match " + "the final type"); + ToBePromoted->replaceAllUsesWith(Transition); + // 2. Update the type of the uses. + // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. + Type *TransitionTy = getTransitionType(); + ToBePromoted->mutateType(TransitionTy); + // 3. Update all the operands of the promoted operation with promoted + // operands. + // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. + for (Use &U : ToBePromoted->operands()) { + Value *Val = U.get(); + Value *NewVal = nullptr; + if (Val == Transition) + NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); + else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || + isa<ConstantFP>(Val)) { + // Use a splat constant if it is not safe to use undef. + NewVal = getConstantVector( + cast<Constant>(Val), + isa<UndefValue>(Val) || + canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); + } else + llvm_unreachable("Did you modified shouldPromote and forgot to update " + "this?"); + ToBePromoted->setOperand(U.getOperandNo(), NewVal); + } + Transition->removeFromParent(); + Transition->insertAfter(ToBePromoted); + Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); +} + +/// Some targets can do store(extractelement) with one instruction. +/// Try to push the extractelement towards the stores when the target +/// has this feature and this is profitable. +bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { + unsigned CombineCost = UINT_MAX; + if (DisableStoreExtract || !TLI || + (!StressStoreExtract && + !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), + Inst->getOperand(1), CombineCost))) + return false; + + // At this point we know that Inst is a vector to scalar transition. + // Try to move it down the def-use chain, until: + // - We can combine the transition with its single use + // => we got rid of the transition. + // - We escape the current basic block + // => we would need to check that we are moving it at a cheaper place and + // we do not do that for now. + BasicBlock *Parent = Inst->getParent(); + DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n'); + VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); + // If the transition has more than one use, assume this is not going to be + // beneficial. + while (Inst->hasOneUse()) { + Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); + DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n'); + + if (ToBePromoted->getParent() != Parent) { + DEBUG(dbgs() << "Instruction to promote is in a different block (" + << ToBePromoted->getParent()->getName() + << ") than the transition (" << Parent->getName() << ").\n"); + return false; + } + + if (VPH.canCombine(ToBePromoted)) { + DEBUG(dbgs() << "Assume " << *Inst << '\n' + << "will be combined with: " << *ToBePromoted << '\n'); + VPH.recordCombineInstruction(ToBePromoted); + bool Changed = VPH.promote(); + NumStoreExtractExposed += Changed; + return Changed; + } + + DEBUG(dbgs() << "Try promoting.\n"); + if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) + return false; + + DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n"); + + VPH.enqueueForPromotion(ToBePromoted); + Inst = ToBePromoted; + } + return false; +} + +bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { + // Bail out if we inserted the instruction to prevent optimizations from + // stepping on each other's toes. + if (InsertedInsts.count(I)) + return false; + + if (PHINode *P = dyn_cast<PHINode>(I)) { + // It is possible for very late stage optimizations (such as SimplifyCFG) + // to introduce PHI nodes too late to be cleaned up. If we detect such a + // trivial PHI, go ahead and zap it here. + if (Value *V = SimplifyInstruction(P, *DL, TLInfo, nullptr)) { + P->replaceAllUsesWith(V); + P->eraseFromParent(); + ++NumPHIsElim; + return true; + } + return false; + } + + if (CastInst *CI = dyn_cast<CastInst>(I)) { + // If the source of the cast is a constant, then this should have + // already been constant folded. The only reason NOT to constant fold + // it is if something (e.g. LSR) was careful to place the constant + // evaluation in a block other than then one that uses it (e.g. to hoist + // the address of globals out of a loop). If this is the case, we don't + // want to forward-subst the cast. + if (isa<Constant>(CI->getOperand(0))) + return false; + + if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) + return true; + + if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { + /// Sink a zext or sext into its user blocks if the target type doesn't + /// fit in one register + if (TLI && + TLI->getTypeAction(CI->getContext(), + TLI->getValueType(*DL, CI->getType())) == + TargetLowering::TypeExpandInteger) { + return SinkCast(CI); + } else { + bool MadeChange = moveExtToFormExtLoad(I); + return MadeChange | optimizeExtUses(I); + } + } + return false; + } + + if (CmpInst *CI = dyn_cast<CmpInst>(I)) + if (!TLI || !TLI->hasMultipleConditionRegisters()) + return OptimizeCmpExpression(CI); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + stripInvariantGroupMetadata(*LI); + if (TLI) { + bool Modified = optimizeLoadExt(LI); + unsigned AS = LI->getPointerAddressSpace(); + Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); + return Modified; + } + return false; + } + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + stripInvariantGroupMetadata(*SI); + if (TLI) { + unsigned AS = SI->getPointerAddressSpace(); + return optimizeMemoryInst(I, SI->getOperand(1), + SI->getOperand(0)->getType(), AS); + } + return false; + } + + BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); + + if (BinOp && (BinOp->getOpcode() == Instruction::AShr || + BinOp->getOpcode() == Instruction::LShr)) { + ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); + if (TLI && CI && TLI->hasExtractBitsInsn()) + return OptimizeExtractBits(BinOp, CI, *TLI, *DL); + + return false; + } + + if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { + if (GEPI->hasAllZeroIndices()) { + /// The GEP operand must be a pointer, so must its result -> BitCast + Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), + GEPI->getName(), GEPI); + GEPI->replaceAllUsesWith(NC); + GEPI->eraseFromParent(); + ++NumGEPsElim; + optimizeInst(NC, ModifiedDT); + return true; + } + return false; + } + + if (CallInst *CI = dyn_cast<CallInst>(I)) + return optimizeCallInst(CI, ModifiedDT); + + if (SelectInst *SI = dyn_cast<SelectInst>(I)) + return optimizeSelectInst(SI); + + if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) + return optimizeShuffleVectorInst(SVI); + + if (auto *Switch = dyn_cast<SwitchInst>(I)) + return optimizeSwitchInst(Switch); + + if (isa<ExtractElementInst>(I)) + return optimizeExtractElementInst(I); + + return false; +} + +// In this pass we look for GEP and cast instructions that are used +// across basic blocks and rewrite them to improve basic-block-at-a-time +// selection. +bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { + SunkAddrs.clear(); + bool MadeChange = false; + + CurInstIterator = BB.begin(); + while (CurInstIterator != BB.end()) { + MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); + if (ModifiedDT) + return true; + } + MadeChange |= dupRetToEnableTailCallOpts(&BB); + + return MadeChange; +} + +// llvm.dbg.value is far away from the value then iSel may not be able +// handle it properly. iSel will drop llvm.dbg.value if it can not +// find a node corresponding to the value. +bool CodeGenPrepare::placeDbgValues(Function &F) { + bool MadeChange = false; + for (BasicBlock &BB : F) { + Instruction *PrevNonDbgInst = nullptr; + for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { + Instruction *Insn = &*BI++; + DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); + // Leave dbg.values that refer to an alloca alone. These + // instrinsics describe the address of a variable (= the alloca) + // being taken. They should not be moved next to the alloca + // (and to the beginning of the scope), but rather stay close to + // where said address is used. + if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { + PrevNonDbgInst = Insn; + continue; + } + + Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); + if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + // If VI is a phi in a block with an EHPad terminator, we can't insert + // after it. + if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) + continue; + DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); + DVI->removeFromParent(); + if (isa<PHINode>(VI)) + DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); + else + DVI->insertAfter(VI); + MadeChange = true; + ++NumDbgValueMoved; + } + } + } + return MadeChange; +} + +// If there is a sequence that branches based on comparing a single bit +// against zero that can be combined into a single instruction, and the +// target supports folding these into a single instruction, sink the +// mask and compare into the branch uses. Do this before OptimizeBlock -> +// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being +// searched for. +bool CodeGenPrepare::sinkAndCmp(Function &F) { + if (!EnableAndCmpSinking) + return false; + if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) + return false; + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = &*I++; + + // Does this BB end with the following? + // %andVal = and %val, #single-bit-set + // %icmpVal = icmp %andResult, 0 + // br i1 %cmpVal label %dest1, label %dest2" + BranchInst *Brcc = dyn_cast<BranchInst>(BB->getTerminator()); + if (!Brcc || !Brcc->isConditional()) + continue; + ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0)); + if (!Cmp || Cmp->getParent() != BB) + continue; + ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1)); + if (!Zero || !Zero->isZero()) + continue; + Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0)); + if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + continue; + ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1)); + if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) + continue; + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + + // Push the "and; icmp" for any users that are conditional branches. + // Since there can only be one branch use per BB, we don't need to keep + // track of which BBs we insert into. + for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); + UI != E; ) { + Use &TheUse = *UI; + // Find brcc use. + BranchInst *BrccUser = dyn_cast<BranchInst>(*UI); + ++UI; + if (!BrccUser || !BrccUser->isConditional()) + continue; + BasicBlock *UserBB = BrccUser->getParent(); + if (UserBB == BB) continue; + DEBUG(dbgs() << "found Brcc use\n"); + + // Sink the "and; icmp" to use. + MadeChange = true; + BinaryOperator *NewAnd = + BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", + BrccUser); + CmpInst *NewCmp = + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, + "", BrccUser); + TheUse = NewCmp; + ++NumAndCmpsMoved; + DEBUG(BrccUser->getParent()->dump()); + } + } + return MadeChange; +} + +/// \brief Retrieve the probabilities of a conditional branch. Returns true on +/// success, or returns false if no or invalid metadata was found. +static bool extractBranchMetadata(BranchInst *BI, + uint64_t &ProbTrue, uint64_t &ProbFalse) { + assert(BI->isConditional() && + "Looking for probabilities on unconditional branch?"); + auto *ProfileData = BI->getMetadata(LLVMContext::MD_prof); + if (!ProfileData || ProfileData->getNumOperands() != 3) + return false; + + const auto *CITrue = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(1)); + const auto *CIFalse = + mdconst::dyn_extract<ConstantInt>(ProfileData->getOperand(2)); + if (!CITrue || !CIFalse) + return false; + + ProbTrue = CITrue->getValue().getZExtValue(); + ProbFalse = CIFalse->getValue().getZExtValue(); + + return true; +} + +/// \brief Scale down both weights to fit into uint32_t. +static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { + uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; + uint32_t Scale = (NewMax / UINT32_MAX) + 1; + NewTrue = NewTrue / Scale; + NewFalse = NewFalse / Scale; +} + +/// \brief Some targets prefer to split a conditional branch like: +/// \code +/// %0 = icmp ne i32 %a, 0 +/// %1 = icmp ne i32 %b, 0 +/// %or.cond = or i1 %0, %1 +/// br i1 %or.cond, label %TrueBB, label %FalseBB +/// \endcode +/// into multiple branch instructions like: +/// \code +/// bb1: +/// %0 = icmp ne i32 %a, 0 +/// br i1 %0, label %TrueBB, label %bb2 +/// bb2: +/// %1 = icmp ne i32 %b, 0 +/// br i1 %1, label %TrueBB, label %FalseBB +/// \endcode +/// This usually allows instruction selection to do even further optimizations +/// and combine the compare with the branch instruction. Currently this is +/// applied for targets which have "cheap" jump instructions. +/// +/// FIXME: Remove the (equivalent?) implementation in SelectionDAG. +/// +bool CodeGenPrepare::splitBranchCondition(Function &F) { + if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) + return false; + + bool MadeChange = false; + for (auto &BB : F) { + // Does this BB end with the following? + // %cond1 = icmp|fcmp|binary instruction ... + // %cond2 = icmp|fcmp|binary instruction ... + // %cond.or = or|and i1 %cond1, cond2 + // br i1 %cond.or label %dest1, label %dest2" + BinaryOperator *LogicOp; + BasicBlock *TBB, *FBB; + if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) + continue; + + auto *Br1 = cast<BranchInst>(BB.getTerminator()); + if (Br1->getMetadata(LLVMContext::MD_unpredictable)) + continue; + + unsigned Opc; + Value *Cond1, *Cond2; + if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) + Opc = Instruction::And; + else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), + m_OneUse(m_Value(Cond2))))) + Opc = Instruction::Or; + else + continue; + + if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || + !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) + continue; + + DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump()); + + // Create a new BB. + auto *InsertBefore = std::next(Function::iterator(BB)) + .getNodePtrUnchecked(); + auto TmpBB = BasicBlock::Create(BB.getContext(), + BB.getName() + ".cond.split", + BB.getParent(), InsertBefore); + + // Update original basic block by using the first condition directly by the + // branch instruction and removing the no longer needed and/or instruction. + Br1->setCondition(Cond1); + LogicOp->eraseFromParent(); + + // Depending on the conditon we have to either replace the true or the false + // successor of the original branch instruction. + if (Opc == Instruction::And) + Br1->setSuccessor(0, TmpBB); + else + Br1->setSuccessor(1, TmpBB); + + // Fill in the new basic block. + auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); + if (auto *I = dyn_cast<Instruction>(Cond2)) { + I->removeFromParent(); + I->insertBefore(Br2); + } + + // Update PHI nodes in both successors. The original BB needs to be + // replaced in one succesor's PHI nodes, because the branch comes now from + // the newly generated BB (NewBB). In the other successor we need to add one + // incoming edge to the PHI nodes, because both branch instructions target + // now the same successor. Depending on the original branch condition + // (and/or) we have to swap the successors (TrueDest, FalseDest), so that + // we perfrom the correct update for the PHI nodes. + // This doesn't change the successor order of the just created branch + // instruction (or any other instruction). + if (Opc == Instruction::Or) + std::swap(TBB, FBB); + + // Replace the old BB with the new BB. + for (auto &I : *TBB) { + PHINode *PN = dyn_cast<PHINode>(&I); + if (!PN) + break; + int i; + while ((i = PN->getBasicBlockIndex(&BB)) >= 0) + PN->setIncomingBlock(i, TmpBB); + } + + // Add another incoming edge form the new BB. + for (auto &I : *FBB) { + PHINode *PN = dyn_cast<PHINode>(&I); + if (!PN) + break; + auto *Val = PN->getIncomingValueForBlock(&BB); + PN->addIncoming(Val, TmpBB); + } + + // Update the branch weights (from SelectionDAGBuilder:: + // FindMergedConditions). + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for NewBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice + // assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + uint64_t TrueWeight, FalseWeight; + if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + uint64_t NewTrueWeight = TrueWeight; + uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + + NewTrueWeight = TrueWeight; + NewFalseWeight = 2 * FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } else { + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for orignal BB. + // Assuming the orignal weights are A and B, one choice is to set BB1's + // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice + // assumes that + // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. + uint64_t TrueWeight, FalseWeight; + if (extractBranchMetadata(Br1, TrueWeight, FalseWeight)) { + uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; + uint64_t NewFalseWeight = FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + + NewTrueWeight = 2 * TrueWeight; + NewFalseWeight = FalseWeight; + scaleWeights(NewTrueWeight, NewFalseWeight); + Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) + .createBranchWeights(TrueWeight, FalseWeight)); + } + } + + // Note: No point in getting fancy here, since the DT info is never + // available to CodeGenPrepare. + ModifiedDT = true; + + MadeChange = true; + + DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump(); + TmpBB->dump()); + } + return MadeChange; +} + +void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) { + if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group)) + I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID()); +} diff --git a/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp new file mode 100644 index 0000000..ff7c0d5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CoreCLRGC.cpp @@ -0,0 +1,54 @@ +//===-- CoreCLRGC.cpp - CoreCLR Runtime GC Strategy -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a GCStrategy for the CoreCLR Runtime. +// The strategy is similar to Statepoint-example GC, but differs from it in +// certain aspects, such as: +// 1) Base-pointers need not be explicitly tracked and reported for +// interior pointers +// 2) Uses a different format for encoding stack-maps +// 3) Location of Safe-point polls: polls are only needed before loop-back edges +// and before tail-calls (not needed at function-entry) +// +// The above differences in behavior are to be implemented in upcoming checkins. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +namespace { +class CoreCLRGC : public GCStrategy { +public: + CoreCLRGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + } + Optional<bool> isGCManagedPointer(const Type *Ty) const override { + // Method is only valid on pointer typed values. + const PointerType *PT = cast<PointerType>(Ty); + // We pick addrspace(1) as our GC managed heap. + return (1 == PT->getAddressSpace()); + } +}; +} + +static GCRegistry::Add<CoreCLRGC> X("coreclr", "CoreCLR-compatible GC"); + +namespace llvm { +void linkCoreCLRGC() {} +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp new file mode 100644 index 0000000..c924ba3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -0,0 +1,680 @@ +//===----- CriticalAntiDepBreaker.cpp - Anti-dep breaker -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#include "CriticalAntiDepBreaker.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "post-RA-sched" + +CriticalAntiDepBreaker::CriticalAntiDepBreaker(MachineFunction &MFi, + const RegisterClassInfo &RCI) + : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getSubtarget().getInstrInfo()), + TRI(MF.getSubtarget().getRegisterInfo()), RegClassInfo(RCI), + Classes(TRI->getNumRegs(), nullptr), KillIndices(TRI->getNumRegs(), 0), + DefIndices(TRI->getNumRegs(), 0), KeepRegs(TRI->getNumRegs(), false) {} + +CriticalAntiDepBreaker::~CriticalAntiDepBreaker() { +} + +void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { + const unsigned BBSize = BB->size(); + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { + // Clear out the register class data. + Classes[i] = nullptr; + + // Initialize the indices to indicate that no registers are live. + KillIndices[i] = ~0u; + DefIndices[i] = BBSize; + } + + // Clear "do not change" set. + KeepRegs.reset(); + + bool IsReturnBlock = BB->isReturnBlock(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (const auto &LI : (*SI)->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; + } + } + + // Mark live-out callee-saved registers. In a return block this is + // all callee-saved registers. In non-return this is any + // callee-saved register that is not saved in the prolog. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + BitVector Pristine = MFI->getPristineRegs(MF); + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) { + if (!IsReturnBlock && !Pristine.test(*I)) continue; + for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { + unsigned Reg = *AI; + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = BBSize; + DefIndices[Reg] = ~0u; + } + } +} + +void CriticalAntiDepBreaker::FinishBlock() { + RegRefs.clear(); + KeepRegs.reset(); +} + +void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) { + // Kill instructions can define registers but are really nops, and there might + // be a real definition earlier that needs to be paired with uses dominated by + // this kill. + + // FIXME: It may be possible to remove the isKill() restriction once PR18663 + // has been properly fixed. There can be value in processing kills as seen in + // the AggressiveAntiDepBreaker class. + if (MI->isDebugValue() || MI->isKill()) + return; + assert(Count < InsertPosIndex && "Instruction index out of expected range!"); + + for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] != ~0u) { + // If Reg is currently live, then mark that it can't be renamed as + // we don't know the extent of its live-range anymore (now that it + // has been scheduled). + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + KillIndices[Reg] = Count; + } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) { + // Any register which was defined within the previous scheduling region + // may have been rescheduled and its lifetime may overlap with registers + // in ways not reflected in our current liveness state. For each such + // register, adjust the liveness state to be conservatively correct. + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Move the def index to the end of the previous region, to reflect + // that the def could theoretically have been scheduled at the end. + DefIndices[Reg] = InsertPosIndex; + } + } + + PrescanInstruction(MI); + ScanInstruction(MI, Count); +} + +/// CriticalPathStep - Return the next SUnit after SU on the bottom-up +/// critical path. +static const SDep *CriticalPathStep(const SUnit *SU) { + const SDep *Next = nullptr; + unsigned NextDepth = 0; + // Find the predecessor edge with the greatest depth. + for (SUnit::const_pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); + P != PE; ++P) { + const SUnit *PredSU = P->getSUnit(); + unsigned PredLatency = P->getLatency(); + unsigned PredTotalLatency = PredSU->getDepth() + PredLatency; + // In the case of a latency tie, prefer an anti-dependency edge over + // other types of edges. + if (NextDepth < PredTotalLatency || + (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) { + NextDepth = PredTotalLatency; + Next = &*P; + } + } + return Next; +} + +void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // instructions that have special allocation requirements. Also assume all + // registers used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservative here because the kill markers cannot be trusted after + // if-conversion: + // %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6<kill>, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6<def> = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6<kill>, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->isCall() || + MI->hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + + // Scan the register operands for this instruction and update + // Classes and RegRefs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + const TargetRegisterClass *NewRC = nullptr; + + if (i < MI->getDesc().getNumOperands()) + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + // Now check for aliases. + for (MCRegAliasIterator AI(Reg, TRI, false); AI.isValid(); ++AI) { + // If an alias of the reg is used during the live range, give up. + // Note that this allows us to skip checking if AntiDepReg + // overlaps with any of the aliases, among other things. + unsigned AliasReg = *AI; + if (Classes[AliasReg]) { + Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1); + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + + // If we're still willing to consider this register, note the reference. + if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) + RegRefs.insert(std::make_pair(Reg, &MO)); + + // If this reg is tied and live (Classes[Reg] is set to -1), we can't change + // it or any of its sub or super regs. We need to use KeepRegs to mark the + // reg because not all uses of the same reg within an instruction are + // necessarily tagged as tied. + // Example: an x86 "xor %eax, %eax" will have one source operand tied to the + // def register but not the second (see PR20020 for details). + // FIXME: can this check be relaxed to account for undef uses + // of a register? In the above 'xor' example, the uses of %eax are undef, so + // earlier instructions could still replace %eax even though the 'xor' + // itself can't be changed. + if (MI->isRegTiedToUseOperand(i) && + Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { + KeepRegs.set(*SubRegs); + } + for (MCSuperRegIterator SuperRegs(Reg, TRI); + SuperRegs.isValid(); ++SuperRegs) { + KeepRegs.set(*SuperRegs); + } + } + + if (MO.isUse() && Special) { + if (!KeepRegs.test(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + KeepRegs.set(*SubRegs); + } + } + } +} + +void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, + unsigned Count) { + // Update liveness. + // Proceeding upwards, registers that are defed but not used in this + // instruction are now dead. + assert(!MI->isKill() && "Attempting to scan a kill instruction"); + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + + if (MO.isRegMask()) + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (MO.clobbersPhysReg(i)) { + DefIndices[i] = Count; + KillIndices[i] = ~0u; + KeepRegs.reset(i); + Classes[i] = nullptr; + RegRefs.erase(i); + } + + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + + // If we've already marked this reg as unchangeable, carry on. + if (KeepRegs.test(Reg)) continue; + + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + // For the reg itself and all subregs: update the def to current; + // reset the kill state, any restrictions, and references. + for (MCSubRegIterator SRI(Reg, TRI, true); SRI.isValid(); ++SRI) { + unsigned SubregReg = *SRI; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.reset(SubregReg); + Classes[SubregReg] = nullptr; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + Classes[*SR] = reinterpret_cast<TargetRegisterClass *>(-1); + } + } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isUse()) continue; + + const TargetRegisterClass *NewRC = nullptr; + if (i < MI->getDesc().getNumOperands()) + NewRC = TII->getRegClass(MI->getDesc(), i, TRI, MF); + + // For now, only allow the register to be changed if its register + // class is consistent across all uses. + if (!Classes[Reg] && NewRC) + Classes[Reg] = NewRC; + else if (!NewRC || Classes[Reg] != NewRC) + Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1); + + RegRefs.insert(std::make_pair(Reg, &MO)); + + // It wasn't previously live but now it is, this is a kill. + // Repeat for all aliases. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (KillIndices[AliasReg] == ~0u) { + KillIndices[AliasReg] = Count; + DefIndices[AliasReg] = ~0u; + } + } + } +} + +// Check all machine operands that reference the antidependent register and must +// be replaced by NewReg. Return true if any of their parent instructions may +// clobber the new register. +// +// Note: AntiDepReg may be referenced by a two-address instruction such that +// it's use operand is tied to a def operand. We guard against the case in which +// the two-address instruction also defines NewReg, as may happen with +// pre/postincrement loads. In this case, both the use and def operands are in +// RegRefs because the def is inserted by PrescanInstruction and not erased +// during ScanInstruction. So checking for an instruction with definitions of +// both NewReg and AntiDepReg covers it. +bool +CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned NewReg) +{ + for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) { + MachineOperand *RefOper = I->second; + + // Don't allow the instruction defining AntiDepReg to earlyclobber its + // operands, in case they may be assigned to NewReg. In this case antidep + // breaking must fail, but it's too rare to bother optimizing. + if (RefOper->isDef() && RefOper->isEarlyClobber()) + return true; + + // Handle cases in which this instruction defines NewReg. + MachineInstr *MI = RefOper->getParent(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &CheckOper = MI->getOperand(i); + + if (CheckOper.isRegMask() && CheckOper.clobbersPhysReg(NewReg)) + return true; + + if (!CheckOper.isReg() || !CheckOper.isDef() || + CheckOper.getReg() != NewReg) + continue; + + // Don't allow the instruction to define NewReg and AntiDepReg. + // When AntiDepReg is renamed it will be an illegal op. + if (RefOper->isDef()) + return true; + + // Don't allow an instruction using AntiDepReg to be earlyclobbered by + // NewReg. + if (CheckOper.isEarlyClobber()) + return true; + + // Don't allow inline asm to define NewReg at all. Who knows what it's + // doing with it. + if (MI->isInlineAsm()) + return true; + } + } + return false; +} + +unsigned CriticalAntiDepBreaker:: +findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &Forbid) +{ + ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(RC); + for (unsigned i = 0; i != Order.size(); ++i) { + unsigned NewReg = Order[i]; + // Don't replace a register with itself. + if (NewReg == AntiDepReg) continue; + // Don't replace a register with one that was recently used to repair + // an anti-dependence with this AntiDepReg, because that would + // re-introduce that anti-dependence. + if (NewReg == LastNewReg) continue; + // If any instructions that define AntiDepReg also define the NewReg, it's + // not suitable. For example, Instruction with multiple definitions can + // result in this condition. + if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue; + // If NewReg is dead and NewReg's most recent def is not before + // AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg. + assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u)) + && "Kill and Def maps aren't consistent for AntiDepReg!"); + assert(((KillIndices[NewReg] == ~0u) != (DefIndices[NewReg] == ~0u)) + && "Kill and Def maps aren't consistent for NewReg!"); + if (KillIndices[NewReg] != ~0u || + Classes[NewReg] == reinterpret_cast<TargetRegisterClass *>(-1) || + KillIndices[AntiDepReg] > DefIndices[NewReg]) + continue; + // If NewReg overlaps any of the forbidden registers, we can't use it. + bool Forbidden = false; + for (SmallVectorImpl<unsigned>::iterator it = Forbid.begin(), + ite = Forbid.end(); it != ite; ++it) + if (TRI->regsOverlap(NewReg, *it)) { + Forbidden = true; + break; + } + if (Forbidden) continue; + return NewReg; + } + + // No registers are free and available! + return 0; +} + +unsigned CriticalAntiDepBreaker:: +BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) { + // The code below assumes that there is at least one instruction, + // so just duck out immediately if the block is empty. + if (SUnits.empty()) return 0; + + // Keep a map of the MachineInstr*'s back to the SUnit representing them. + // This is used for updating debug information. + // + // FIXME: Replace this with the existing map in ScheduleDAGInstrs::MISUnitMap + DenseMap<MachineInstr*,const SUnit*> MISUnitMap; + + // Find the node at the bottom of the critical path. + const SUnit *Max = nullptr; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + const SUnit *SU = &SUnits[i]; + MISUnitMap[SU->getInstr()] = SU; + if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) + Max = SU; + } + +#ifndef NDEBUG + { + DEBUG(dbgs() << "Critical path has total latency " + << (Max->getDepth() + Max->Latency) << "\n"); + DEBUG(dbgs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (KillIndices[Reg] == ~0u) + DEBUG(dbgs() << " " << TRI->getName(Reg)); + } + DEBUG(dbgs() << '\n'); + } +#endif + + // Track progress along the critical path through the SUnit graph as we walk + // the instructions. + const SUnit *CriticalPathSU = Max; + MachineInstr *CriticalPathMI = CriticalPathSU->getInstr(); + + // Consider this pattern: + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // A = ... + // ... = A + // There are three anti-dependencies here, and without special care, + // we'd break all of them using the same register: + // A = ... + // ... = A + // B = ... + // ... = B + // B = ... + // ... = B + // B = ... + // ... = B + // because at each anti-dependence, B is the first register that + // isn't A which is free. This re-introduces anti-dependencies + // at all but one of the original anti-dependencies that we were + // trying to break. To avoid this, keep track of the most recent + // register that each register was replaced with, avoid + // using it to repair an anti-dependence on the same register. + // This lets us produce this: + // A = ... + // ... = A + // B = ... + // ... = B + // C = ... + // ... = C + // B = ... + // ... = B + // This still has an anti-dependence on B, but at least it isn't on the + // original critical path. + // + // TODO: If we tracked more than one register here, we could potentially + // fix that remaining critical edge too. This is a little more involved, + // because unlike the most recent register, less recent registers should + // still be considered, though only if no other registers are available. + std::vector<unsigned> LastNewReg(TRI->getNumRegs(), 0); + + // Attempt to break anti-dependence edges on the critical path. Walk the + // instructions from the bottom up, tracking information about liveness + // as we go to help determine which registers are available. + unsigned Broken = 0; + unsigned Count = InsertPosIndex - 1; + for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { + MachineInstr *MI = --I; + // Kill instructions can define registers but are really nops, and there + // might be a real definition earlier that needs to be paired with uses + // dominated by this kill. + + // FIXME: It may be possible to remove the isKill() restriction once PR18663 + // has been properly fixed. There can be value in processing kills as seen + // in the AggressiveAntiDepBreaker class. + if (MI->isDebugValue() || MI->isKill()) + continue; + + // Check if this instruction has a dependence on the critical path that + // is an anti-dependence that we may be able to break. If it is, set + // AntiDepReg to the non-zero register associated with the anti-dependence. + // + // We limit our attention to the critical path as a heuristic to avoid + // breaking anti-dependence edges that aren't going to significantly + // impact the overall schedule. There are a limited number of registers + // and we want to save them for the important edges. + // + // TODO: Instructions with multiple defs could have multiple + // anti-dependencies. The current code here only knows how to break one + // edge per instruction. Note that we'd have to be able to break all of + // the anti-dependencies in an instruction in order to be effective. + unsigned AntiDepReg = 0; + if (MI == CriticalPathMI) { + if (const SDep *Edge = CriticalPathStep(CriticalPathSU)) { + const SUnit *NextSU = Edge->getSUnit(); + + // Only consider anti-dependence edges. + if (Edge->getKind() == SDep::Anti) { + AntiDepReg = Edge->getReg(); + assert(AntiDepReg != 0 && "Anti-dependence on reg0?"); + if (!MRI.isAllocatable(AntiDepReg)) + // Don't break anti-dependencies on non-allocatable registers. + AntiDepReg = 0; + else if (KeepRegs.test(AntiDepReg)) + // Don't break anti-dependencies if a use down below requires + // this exact register. + AntiDepReg = 0; + else { + // If the SUnit has other dependencies on the SUnit that it + // anti-depends on, don't bother breaking the anti-dependency + // since those edges would prevent such units from being + // scheduled past each other regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. + for (SUnit::const_pred_iterator P = CriticalPathSU->Preds.begin(), + PE = CriticalPathSU->Preds.end(); P != PE; ++P) + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + } + CriticalPathSU = NextSU; + CriticalPathMI = CriticalPathSU->getInstr(); + } else { + // We've reached the end of the critical path. + CriticalPathSU = nullptr; + CriticalPathMI = nullptr; + } + } + + PrescanInstruction(MI); + + SmallVector<unsigned, 2> ForbidRegs; + + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) + // If this instruction's defs have special allocation requirement, don't + // break this anti-dependency. + AntiDepReg = 0; + else if (AntiDepReg) { + // If this instruction has a use of AntiDepReg, breaking it + // is invalid. If the instruction defines other registers, + // save a list of them so that we don't pick a new register + // that overlaps any of them. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { + AntiDepReg = 0; + break; + } + if (MO.isDef() && Reg != AntiDepReg) + ForbidRegs.push_back(Reg); + } + } + + // Determine AntiDepReg's register class, if it is live and is + // consistently used within a single class. + const TargetRegisterClass *RC = AntiDepReg != 0 ? Classes[AntiDepReg] + : nullptr; + assert((AntiDepReg == 0 || RC != nullptr) && + "Register should be live if it's causing an anti-dependence!"); + if (RC == reinterpret_cast<TargetRegisterClass *>(-1)) + AntiDepReg = 0; + + // Look for a suitable register to use to break the anti-dependence. + // + // TODO: Instead of picking the first free register, consider which might + // be the best. + if (AntiDepReg != 0) { + std::pair<std::multimap<unsigned, MachineOperand *>::iterator, + std::multimap<unsigned, MachineOperand *>::iterator> + Range = RegRefs.equal_range(AntiDepReg); + if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, + AntiDepReg, + LastNewReg[AntiDepReg], + RC, ForbidRegs)) { + DEBUG(dbgs() << "Breaking anti-dependence edge on " + << TRI->getName(AntiDepReg) + << " with " << RegRefs.count(AntiDepReg) << " references" + << " using " << TRI->getName(NewReg) << "!\n"); + + // Update the references to the old register to refer to the new + // register. + for (std::multimap<unsigned, MachineOperand *>::iterator + Q = Range.first, QE = Range.second; Q != QE; ++Q) { + Q->second->setReg(NewReg); + // If the SU for the instruction being updated has debug information + // related to the anti-dependency register, make sure to update that + // as well. + const SUnit *SU = MISUnitMap[Q->second->getParent()]; + if (!SU) continue; + for (DbgValueVector::iterator DVI = DbgValues.begin(), + DVE = DbgValues.end(); DVI != DVE; ++DVI) + if (DVI->second == Q->second->getParent()) + UpdateDbgValue(DVI->first, AntiDepReg, NewReg); + } + + // We just went back in time and modified history; the + // liveness information for the anti-dependence reg is now + // inconsistent. Set the state as if it were dead. + Classes[NewReg] = Classes[AntiDepReg]; + DefIndices[NewReg] = DefIndices[AntiDepReg]; + KillIndices[NewReg] = KillIndices[AntiDepReg]; + assert(((KillIndices[NewReg] == ~0u) != + (DefIndices[NewReg] == ~0u)) && + "Kill and Def maps aren't consistent for NewReg!"); + + Classes[AntiDepReg] = nullptr; + DefIndices[AntiDepReg] = KillIndices[AntiDepReg]; + KillIndices[AntiDepReg] = ~0u; + assert(((KillIndices[AntiDepReg] == ~0u) != + (DefIndices[AntiDepReg] == ~0u)) && + "Kill and Def maps aren't consistent for AntiDepReg!"); + + RegRefs.erase(AntiDepReg); + LastNewReg[AntiDepReg] = NewReg; + ++Broken; + } + } + + ScanInstruction(MI, Count); + } + + return Broken; +} diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h new file mode 100644 index 0000000..10b8739 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -0,0 +1,108 @@ +//=- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CriticalAntiDepBreaker class, which +// implements register anti-dependence breaking along a blocks +// critical path during post-RA scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H +#define LLVM_LIB_CODEGEN_CRITICALANTIDEPBREAKER_H + +#include "AntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include <map> + +namespace llvm { +class RegisterClassInfo; +class TargetInstrInfo; +class TargetRegisterInfo; + +class LLVM_LIBRARY_VISIBILITY CriticalAntiDepBreaker : public AntiDepBreaker { + MachineFunction& MF; + MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const RegisterClassInfo &RegClassInfo; + + /// The set of allocatable registers. + /// We'll be ignoring anti-dependencies on non-allocatable registers, + /// because they may not be safe to break. + const BitVector AllocatableSet; + + /// For live regs that are only used in one register class in a + /// live range, the register class. If the register is not live, the + /// corresponding value is null. If the register is live but used in + /// multiple register classes, the corresponding value is -1 casted to a + /// pointer. + std::vector<const TargetRegisterClass*> Classes; + + /// Map registers to all their references within a live range. + std::multimap<unsigned, MachineOperand *> RegRefs; + typedef std::multimap<unsigned, MachineOperand *>::const_iterator + RegRefIter; + + /// The index of the most recent kill (proceeding bottom-up), + /// or ~0u if the register is not live. + std::vector<unsigned> KillIndices; + + /// The index of the most recent complete def (proceeding + /// bottom up), or ~0u if the register is live. + std::vector<unsigned> DefIndices; + + /// A set of registers which are live and cannot be changed to + /// break anti-dependencies. + BitVector KeepRegs; + + public: + CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&); + ~CriticalAntiDepBreaker() override; + + /// Initialize anti-dep breaking for a new basic block. + void StartBlock(MachineBasicBlock *BB) override; + + /// Identifiy anti-dependencies along the critical path + /// of the ScheduleDAG and break them by renaming registers. + unsigned BreakAntiDependencies(const std::vector<SUnit>& SUnits, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned InsertPosIndex, + DbgValueVector &DbgValues) override; + + /// Update liveness information to account for the current + /// instruction, which will not be scheduled. + void Observe(MachineInstr *MI, unsigned Count, + unsigned InsertPosIndex) override; + + /// Finish anti-dep breaking for a basic block. + void FinishBlock() override; + + private: + void PrescanInstruction(MachineInstr *MI); + void ScanInstruction(MachineInstr *MI, unsigned Count); + bool isNewRegClobberedByRefs(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned NewReg); + unsigned findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVectorImpl<unsigned> &Forbid); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp new file mode 100644 index 0000000..af6b6a3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -0,0 +1,274 @@ +//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This class implements a deterministic finite automaton (DFA) based +// packetizing mechanism for VLIW architectures. It provides APIs to +// determine whether there exists a legal mapping of instructions to +// functional unit assignments in a packet. The DFA is auto-generated from +// the target's Schedule.td file. +// +// A DFA consists of 3 major elements: states, inputs, and transitions. For +// the packetizing mechanism, the input is the set of instruction classes for +// a target. The state models all possible combinations of functional unit +// consumption for a given set of instructions in a packet. A transition +// models the addition of an instruction to a packet. In the DFA constructed +// by this class, if an instruction can be added to a packet, then a valid +// transition exists from the corresponding state. Invalid transitions +// indicate that the instruction cannot be added to the current packet. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/DFAPacketizer.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +// -------------------------------------------------------------------- +// Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp + +namespace { + DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) { + return (Inp << DFA_MAX_RESOURCES) | FuncUnits; + } + + /// Return the DFAInput for an instruction class input vector. + /// This function is used in both DFAPacketizer.cpp and in + /// DFAPacketizerEmitter.cpp. + DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) { + DFAInput InsnInput = 0; + assert ((InsnClass.size() <= DFA_MAX_RESTERMS) && + "Exceeded maximum number of DFA terms"); + for (auto U : InsnClass) + InsnInput = addDFAFuncUnits(InsnInput, U); + return InsnInput; + } +} +// -------------------------------------------------------------------- + +DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, + const DFAStateInput (*SIT)[2], + const unsigned *SET): + InstrItins(I), CurrentState(0), DFAStateInputTable(SIT), + DFAStateEntryTable(SET) { + // Make sure DFA types are large enough for the number of terms & resources. + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput"); + assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)) + && "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput"); +} + + +// +// ReadTable - Read the DFA transition table and update CachedTable. +// +// Format of the transition tables: +// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid +// transitions +// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable +// for the ith state +// +void DFAPacketizer::ReadTable(unsigned int state) { + unsigned ThisState = DFAStateEntryTable[state]; + unsigned NextStateInTable = DFAStateEntryTable[state+1]; + // Early exit in case CachedTable has already contains this + // state's transitions. + if (CachedTable.count(UnsignPair(state, + DFAStateInputTable[ThisState][0]))) + return; + + for (unsigned i = ThisState; i < NextStateInTable; i++) + CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] = + DFAStateInputTable[i][1]; +} + +// +// getInsnInput - Return the DFAInput for an instruction class. +// +DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) { + // Note: this logic must match that in DFAPacketizerDefs.h for input vectors. + DFAInput InsnInput = 0; + unsigned i = 0; + for (const InstrStage *IS = InstrItins->beginStage(InsnClass), + *IE = InstrItins->endStage(InsnClass); IS != IE; ++IS, ++i) { + InsnInput = addDFAFuncUnits(InsnInput, IS->getUnits()); + assert ((i < DFA_MAX_RESTERMS) && "Exceeded maximum number of DFA inputs"); + } + return InsnInput; +} + +// getInsnInput - Return the DFAInput for an instruction class input vector. +DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) { + return getDFAInsnInput(InsnClass); +} + +// canReserveResources - Check if the resources occupied by a MCInstrDesc +// are available in the current state. +bool DFAPacketizer::canReserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); + ReadTable(CurrentState); + return (CachedTable.count(StateTrans) != 0); +} + +// reserveResources - Reserve the resources occupied by a MCInstrDesc and +// change the current state to reflect that change. +void DFAPacketizer::reserveResources(const llvm::MCInstrDesc *MID) { + unsigned InsnClass = MID->getSchedClass(); + DFAInput InsnInput = getInsnInput(InsnClass); + UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput); + ReadTable(CurrentState); + assert(CachedTable.count(StateTrans) != 0); + CurrentState = CachedTable[StateTrans]; +} + + +// canReserveResources - Check if the resources occupied by a machine +// instruction are available in the current state. +bool DFAPacketizer::canReserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + return canReserveResources(&MID); +} + +// reserveResources - Reserve the resources occupied by a machine +// instruction and change the current state to reflect that change. +void DFAPacketizer::reserveResources(llvm::MachineInstr *MI) { + const llvm::MCInstrDesc &MID = MI->getDesc(); + reserveResources(&MID); +} + +namespace llvm { +// DefaultVLIWScheduler - This class extends ScheduleDAGInstrs and overrides +// Schedule method to build the dependence graph. +class DefaultVLIWScheduler : public ScheduleDAGInstrs { +private: + AliasAnalysis *AA; +public: + DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI, + AliasAnalysis *AA); + // Schedule - Actual scheduling work. + void schedule() override; +}; +} + +DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF, + MachineLoopInfo &MLI, + AliasAnalysis *AA) + : ScheduleDAGInstrs(MF, &MLI), AA(AA) { + CanHandleTerminators = true; +} + +void DefaultVLIWScheduler::schedule() { + // Build the scheduling graph. + buildSchedGraph(AA); +} + +// VLIWPacketizerList Ctor +VLIWPacketizerList::VLIWPacketizerList(MachineFunction &MF, + MachineLoopInfo &MLI, AliasAnalysis *AA) + : MF(MF), AA(AA) { + TII = MF.getSubtarget().getInstrInfo(); + ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget()); + VLIWScheduler = new DefaultVLIWScheduler(MF, MLI, AA); +} + +// VLIWPacketizerList Dtor +VLIWPacketizerList::~VLIWPacketizerList() { + if (VLIWScheduler) + delete VLIWScheduler; + + if (ResourceTracker) + delete ResourceTracker; +} + +// endPacket - End the current packet, bundle packet instructions and reset +// DFA state. +void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB, + MachineInstr *MI) { + if (CurrentPacketMIs.size() > 1) { + MachineInstr *MIFirst = CurrentPacketMIs.front(); + finalizeBundle(*MBB, MIFirst->getIterator(), MI->getIterator()); + } + CurrentPacketMIs.clear(); + ResourceTracker->clearResources(); +} + +// PacketizeMIs - Bundle machine instructions into packets. +void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, + MachineBasicBlock::iterator BeginItr, + MachineBasicBlock::iterator EndItr) { + assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); + VLIWScheduler->startBlock(MBB); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, + std::distance(BeginItr, EndItr)); + VLIWScheduler->schedule(); + + // Generate MI -> SU map. + MIToSUnit.clear(); + for (unsigned i = 0, e = VLIWScheduler->SUnits.size(); i != e; ++i) { + SUnit *SU = &VLIWScheduler->SUnits[i]; + MIToSUnit[SU->getInstr()] = SU; + } + + // The main packetizer loop. + for (; BeginItr != EndItr; ++BeginItr) { + MachineInstr *MI = BeginItr; + + this->initPacketizerState(); + + // End the current packet if needed. + if (this->isSoloInstruction(MI)) { + endPacket(MBB, MI); + continue; + } + + // Ignore pseudo instructions. + if (this->ignorePseudoInstruction(MI, MBB)) + continue; + + SUnit *SUI = MIToSUnit[MI]; + assert(SUI && "Missing SUnit Info!"); + + // Ask DFA if machine resource is available for MI. + bool ResourceAvail = ResourceTracker->canReserveResources(MI); + if (ResourceAvail && shouldAddToPacket(MI)) { + // Dependency check for MI with instructions in CurrentPacketMIs. + for (std::vector<MachineInstr*>::iterator VI = CurrentPacketMIs.begin(), + VE = CurrentPacketMIs.end(); VI != VE; ++VI) { + MachineInstr *MJ = *VI; + SUnit *SUJ = MIToSUnit[MJ]; + assert(SUJ && "Missing SUnit Info!"); + + // Is it legal to packetize SUI and SUJ together. + if (!this->isLegalToPacketizeTogether(SUI, SUJ)) { + // Allow packetization if dependency can be pruned. + if (!this->isLegalToPruneDependencies(SUI, SUJ)) { + // End the packet if dependency cannot be pruned. + endPacket(MBB, MI); + break; + } // !isLegalToPruneDependencies. + } // !isLegalToPacketizeTogether. + } // For all instructions in CurrentPacketMIs. + } else { + // End the packet if resource is not available, or if the instruction + // shoud not be added to the current packet. + endPacket(MBB, MI); + } + + // Add MI to the current packet. + BeginItr = this->addToPacket(MI); + } // For all instructions in BB. + + // End any packet left behind. + endPacket(MBB, EndItr); + VLIWScheduler->exitRegion(); + VLIWScheduler->finishBlock(); +} diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp new file mode 100644 index 0000000..b11b497 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -0,0 +1,176 @@ +//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level dead-code-elimination pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "codegen-dce" + +STATISTIC(NumDeletes, "Number of dead instructions deleted"); + +namespace { + class DeadMachineInstructionElim : public MachineFunctionPass { + bool runOnMachineFunction(MachineFunction &MF) override; + + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + BitVector LivePhysRegs; + + public: + static char ID; // Pass identification, replacement for typeid + DeadMachineInstructionElim() : MachineFunctionPass(ID) { + initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry()); + } + + private: + bool isDead(const MachineInstr *MI) const; + }; +} +char DeadMachineInstructionElim::ID = 0; +char &llvm::DeadMachineInstructionElimID = DeadMachineInstructionElim::ID; + +INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination", + "Remove dead machine instructions", false, false) + +bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const { + // Technically speaking inline asm without side effects and no defs can still + // be deleted. But there is so much bad inline asm code out there, we should + // let them be. + if (MI->isInlineAsm()) + return false; + + // Don't delete frame allocation labels. + if (MI->getOpcode() == TargetOpcode::LOCAL_ESCAPE) + return false; + + // Don't delete instructions with side effects. + bool SawStore = false; + if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) + return false; + + // Examine each operand. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Don't delete live physreg defs, or any reserved register defs. + if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg)) + return false; + } else { + if (!MRI->use_nodbg_empty(Reg)) + // This def has a non-debug use. Don't delete the instruction! + return false; + } + } + } + + // If there are no defs with uses, the instruction is dead. + return true; +} + +bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + bool AnyChanges = false; + MRI = &MF.getRegInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + // Loop over all instructions in all blocks, from bottom to top, so that it's + // more likely that chains of dependent but ultimately dead instructions will + // be cleaned up. + for (MachineBasicBlock &MBB : make_range(MF.rbegin(), MF.rend())) { + // Start out assuming that reserved registers are live out of this block. + LivePhysRegs = MRI->getReservedRegs(); + + // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not + // live across blocks, but some targets (x86) can have flags live out of a + // block. + for (MachineBasicBlock::succ_iterator S = MBB.succ_begin(), + E = MBB.succ_end(); S != E; S++) + for (const auto &LI : (*S)->liveins()) + LivePhysRegs.set(LI.PhysReg); + + // Now scan the instructions and delete dead ones, tracking physreg + // liveness as we go. + for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), + MIE = MBB.rend(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // If the instruction is dead, delete it! + if (isDead(MI)) { + DEBUG(dbgs() << "DeadMachineInstructionElim: DELETING: " << *MI); + // It is possible that some DBG_VALUE instructions refer to this + // instruction. They get marked as undef and will be deleted + // in the live debug variable analysis. + MI->eraseFromParentAndMarkDBGValuesForRemoval(); + AnyChanges = true; + ++NumDeletes; + MIE = MBB.rend(); + // MII is now pointing to the next instruction to process, + // so don't increment it. + continue; + } + + // Record the physreg defs. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Check the subreg set, not the alias set, because a def + // of a super-register may still be partially live after + // this def. + for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true); + SR.isValid(); ++SR) + LivePhysRegs.reset(*SR); + } + } else if (MO.isRegMask()) { + // Register mask of preserved registers. All clobbers are dead. + LivePhysRegs.clearBitsNotInMask(MO.getRegMask()); + } + } + // Record the physreg uses, after the defs, in case a physreg is + // both defined and used in the same instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + LivePhysRegs.set(*AI); + } + } + } + + // We didn't delete the current instruction, so increment MII to + // the next one. + ++MII; + } + } + + LivePhysRegs.clear(); + return AnyChanges; +} diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp new file mode 100644 index 0000000..eae78a9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -0,0 +1,264 @@ +//===-- DwarfEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass mulches exception handling code into a form adapted to code +// generation. Required if using dwarf exception handling. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/Local.h" +using namespace llvm; + +#define DEBUG_TYPE "dwarfehprepare" + +STATISTIC(NumResumesLowered, "Number of resume calls lowered"); + +namespace { + class DwarfEHPrepare : public FunctionPass { + const TargetMachine *TM; + + // RewindFunction - _Unwind_Resume or the target equivalent. + Constant *RewindFunction; + + DominatorTree *DT; + const TargetLowering *TLI; + + bool InsertUnwindResumeCalls(Function &Fn); + Value *GetExceptionObject(ResumeInst *RI); + size_t + pruneUnreachableResumes(Function &Fn, + SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads); + + public: + static char ID; // Pass identification, replacement for typeid. + + // INITIALIZE_TM_PASS requires a default constructor, but it isn't used in + // practice. + DwarfEHPrepare() + : FunctionPass(ID), TM(nullptr), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} + + DwarfEHPrepare(const TargetMachine *TM) + : FunctionPass(ID), TM(TM), RewindFunction(nullptr), DT(nullptr), + TLI(nullptr) {} + + bool runOnFunction(Function &Fn) override; + + bool doFinalization(Module &M) override { + RewindFunction = nullptr; + return false; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { + return "Exception handling preparation"; + } + }; +} // end anonymous namespace + +char DwarfEHPrepare::ID = 0; +INITIALIZE_TM_PASS_BEGIN(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_TM_PASS_END(DwarfEHPrepare, "dwarfehprepare", + "Prepare DWARF exceptions", false, false) + +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { + return new DwarfEHPrepare(TM); +} + +void DwarfEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addRequired<DominatorTreeWrapperPass>(); +} + +/// GetExceptionObject - Return the exception object from the value passed into +/// the 'resume' instruction (typically an aggregate). Clean up any dead +/// instructions, including the 'resume' instruction. +Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { + Value *V = RI->getOperand(0); + Value *ExnObj = nullptr; + InsertValueInst *SelIVI = dyn_cast<InsertValueInst>(V); + LoadInst *SelLoad = nullptr; + InsertValueInst *ExcIVI = nullptr; + bool EraseIVIs = false; + + if (SelIVI) { + if (SelIVI->getNumIndices() == 1 && *SelIVI->idx_begin() == 1) { + ExcIVI = dyn_cast<InsertValueInst>(SelIVI->getOperand(0)); + if (ExcIVI && isa<UndefValue>(ExcIVI->getOperand(0)) && + ExcIVI->getNumIndices() == 1 && *ExcIVI->idx_begin() == 0) { + ExnObj = ExcIVI->getOperand(1); + SelLoad = dyn_cast<LoadInst>(SelIVI->getOperand(1)); + EraseIVIs = true; + } + } + } + + if (!ExnObj) + ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI); + + RI->eraseFromParent(); + + if (EraseIVIs) { + if (SelIVI->use_empty()) + SelIVI->eraseFromParent(); + if (ExcIVI->use_empty()) + ExcIVI->eraseFromParent(); + if (SelLoad && SelLoad->use_empty()) + SelLoad->eraseFromParent(); + } + + return ExnObj; +} + +/// Replace resumes that are not reachable from a cleanup landing pad with +/// unreachable and then simplify those blocks. +size_t DwarfEHPrepare::pruneUnreachableResumes( + Function &Fn, SmallVectorImpl<ResumeInst *> &Resumes, + SmallVectorImpl<LandingPadInst *> &CleanupLPads) { + BitVector ResumeReachable(Resumes.size()); + size_t ResumeIndex = 0; + for (auto *RI : Resumes) { + for (auto *LP : CleanupLPads) { + if (isPotentiallyReachable(LP, RI, DT)) { + ResumeReachable.set(ResumeIndex); + break; + } + } + ++ResumeIndex; + } + + // If everything is reachable, there is no change. + if (ResumeReachable.all()) + return Resumes.size(); + + const TargetTransformInfo &TTI = + getAnalysis<TargetTransformInfoWrapperPass>().getTTI(Fn); + LLVMContext &Ctx = Fn.getContext(); + + // Otherwise, insert unreachable instructions and call simplifycfg. + size_t ResumesLeft = 0; + for (size_t I = 0, E = Resumes.size(); I < E; ++I) { + ResumeInst *RI = Resumes[I]; + if (ResumeReachable[I]) { + Resumes[ResumesLeft++] = RI; + } else { + BasicBlock *BB = RI->getParent(); + new UnreachableInst(Ctx, RI); + RI->eraseFromParent(); + SimplifyCFG(BB, TTI, 1); + } + } + Resumes.resize(ResumesLeft); + return ResumesLeft; +} + +/// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present +/// into calls to the appropriate _Unwind_Resume function. +bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { + SmallVector<ResumeInst*, 16> Resumes; + SmallVector<LandingPadInst*, 16> CleanupLPads; + for (BasicBlock &BB : Fn) { + if (auto *RI = dyn_cast<ResumeInst>(BB.getTerminator())) + Resumes.push_back(RI); + if (auto *LP = BB.getLandingPadInst()) + if (LP->isCleanup()) + CleanupLPads.push_back(LP); + } + + if (Resumes.empty()) + return false; + + // Check the personality, don't do anything if it's funclet-based. + EHPersonality Pers = classifyEHPersonality(Fn.getPersonalityFn()); + if (isFuncletEHPersonality(Pers)) + return false; + + LLVMContext &Ctx = Fn.getContext(); + + size_t ResumesLeft = pruneUnreachableResumes(Fn, Resumes, CleanupLPads); + if (ResumesLeft == 0) + return true; // We pruned them all. + + // Find the rewind function if we didn't already. + if (!RewindFunction) { + FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), + Type::getInt8PtrTy(Ctx), false); + const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); + RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); + } + + // Create the basic block where the _Unwind_Resume call will live. + if (ResumesLeft == 1) { + // Instead of creating a new BB and PHI node, just append the call to + // _Unwind_Resume to the end of the single resume block. + ResumeInst *RI = Resumes.front(); + BasicBlock *UnwindBB = RI->getParent(); + Value *ExnObj = GetExceptionObject(RI); + + // Call the _Unwind_Resume function. + CallInst *CI = CallInst::Create(RewindFunction, ExnObj, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + return true; + } + + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); + PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), ResumesLeft, + "exn.obj", UnwindBB); + + // Extract the exception object from the ResumeInst and add it to the PHI node + // that feeds the _Unwind_Resume call. + for (ResumeInst *RI : Resumes) { + BasicBlock *Parent = RI->getParent(); + BranchInst::Create(UnwindBB, Parent); + + Value *ExnObj = GetExceptionObject(RI); + PN->addIncoming(ExnObj, Parent); + + ++NumResumesLowered; + } + + // Call the function. + CallInst *CI = CallInst::Create(RewindFunction, PN, "", UnwindBB); + CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); + + // We never expect _Unwind_Resume to return. + new UnreachableInst(Ctx, UnwindBB); + return true; +} + +bool DwarfEHPrepare::runOnFunction(Function &Fn) { + assert(TM && "DWARF EH preparation requires a target machine"); + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); + bool Changed = InsertUnwindResumeCalls(Fn); + DT = nullptr; + TLI = nullptr; + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp new file mode 100644 index 0000000..f3536d7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp @@ -0,0 +1,814 @@ +//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Early if-conversion is for out-of-order CPUs that don't have a lot of +// predicable instructions. The goal is to eliminate conditional branches that +// may mispredict. +// +// Instructions from both sides of the branch are executed specutatively, and a +// cmov instruction selects the result. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "early-ifcvt" + +// Absolute maximum number of instructions allowed per speculated block. +// This bypasses all other heuristics, so it should be set fairly high. +static cl::opt<unsigned> +BlockInstrLimit("early-ifcvt-limit", cl::init(30), cl::Hidden, + cl::desc("Maximum number of instructions per speculated block.")); + +// Stress testing mode - disable heuristics. +static cl::opt<bool> Stress("stress-early-ifcvt", cl::Hidden, + cl::desc("Turn all knobs to 11")); + +STATISTIC(NumDiamondsSeen, "Number of diamonds"); +STATISTIC(NumDiamondsConv, "Number of diamonds converted"); +STATISTIC(NumTrianglesSeen, "Number of triangles"); +STATISTIC(NumTrianglesConv, "Number of triangles converted"); + +//===----------------------------------------------------------------------===// +// SSAIfConv +//===----------------------------------------------------------------------===// +// +// The SSAIfConv class performs if-conversion on SSA form machine code after +// determining if it is possible. The class contains no heuristics; external +// code should be used to determine when if-conversion is a good idea. +// +// SSAIfConv can convert both triangles and diamonds: +// +// Triangle: Head Diamond: Head +// | \ / \_ +// | \ / | +// | [TF]BB FBB TBB +// | / \ / +// | / \ / +// Tail Tail +// +// Instructions in the conditional blocks TBB and/or FBB are spliced into the +// Head block, and phis in the Tail block are converted to select instructions. +// +namespace { +class SSAIfConv { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + +public: + /// The block containing the conditional branch. + MachineBasicBlock *Head; + + /// The block containing phis after the if-then-else. + MachineBasicBlock *Tail; + + /// The 'true' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *TBB; + + /// The 'false' conditional block as determined by AnalyzeBranch. + MachineBasicBlock *FBB; + + /// isTriangle - When there is no 'else' block, either TBB or FBB will be + /// equal to Tail. + bool isTriangle() const { return TBB == Tail || FBB == Tail; } + + /// Returns the Tail predecessor for the True side. + MachineBasicBlock *getTPred() const { return TBB == Tail ? Head : TBB; } + + /// Returns the Tail predecessor for the False side. + MachineBasicBlock *getFPred() const { return FBB == Tail ? Head : FBB; } + + /// Information about each phi in the Tail block. + struct PHIInfo { + MachineInstr *PHI; + unsigned TReg, FReg; + // Latencies from Cond+Branch, TReg, and FReg to DstReg. + int CondCycles, TCycles, FCycles; + + PHIInfo(MachineInstr *phi) + : PHI(phi), TReg(0), FReg(0), CondCycles(0), TCycles(0), FCycles(0) {} + }; + + SmallVector<PHIInfo, 8> PHIs; + +private: + /// The branch condition determined by AnalyzeBranch. + SmallVector<MachineOperand, 4> Cond; + + /// Instructions in Head that define values used by the conditional blocks. + /// The hoisted instructions must be inserted after these instructions. + SmallPtrSet<MachineInstr*, 8> InsertAfter; + + /// Register units clobbered by the conditional blocks. + BitVector ClobberedRegUnits; + + // Scratch pad for findInsertionPoint. + SparseSet<unsigned> LiveRegUnits; + + /// Insertion point in Head for speculatively executed instructions form TBB + /// and FBB. + MachineBasicBlock::iterator InsertionPoint; + + /// Return true if all non-terminator instructions in MBB can be safely + /// speculated. + bool canSpeculateInstrs(MachineBasicBlock *MBB); + + /// Find a valid insertion point in Head. + bool findInsertionPoint(); + + /// Replace PHI instructions in Tail with selects. + void replacePHIInstrs(); + + /// Insert selects and rewrite PHI operands to use them. + void rewritePHIOperands(); + +public: + /// runOnMachineFunction - Initialize per-function data structures. + void runOnMachineFunction(MachineFunction &MF) { + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + LiveRegUnits.clear(); + LiveRegUnits.setUniverse(TRI->getNumRegUnits()); + ClobberedRegUnits.clear(); + ClobberedRegUnits.resize(TRI->getNumRegUnits()); + } + + /// canConvertIf - If the sub-CFG headed by MBB can be if-converted, + /// initialize the internal state, and return true. + bool canConvertIf(MachineBasicBlock *MBB); + + /// convertIf - If-convert the last block passed to canConvertIf(), assuming + /// it is possible. Add any erased blocks to RemovedBlocks. + void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks); +}; +} // end anonymous namespace + + +/// canSpeculateInstrs - Returns true if all the instructions in MBB can safely +/// be speculated. The terminators are not considered. +/// +/// If instructions use any values that are defined in the head basic block, +/// the defining instructions are added to InsertAfter. +/// +/// Any clobbered regunits are added to ClobberedRegUnits. +/// +bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) { + // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to + // get right. + if (!MBB->livein_empty()) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has live-ins.\n"); + return false; + } + + unsigned InstrCount = 0; + + // Check all instructions, except the terminators. It is assumed that + // terminators never have side effects or define any used register values. + for (MachineBasicBlock::iterator I = MBB->begin(), + E = MBB->getFirstTerminator(); I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (++InstrCount > BlockInstrLimit && !Stress) { + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " has more than " + << BlockInstrLimit << " instructions.\n"); + return false; + } + + // There shouldn't normally be any phis in a single-predecessor block. + if (I->isPHI()) { + DEBUG(dbgs() << "Can't hoist: " << *I); + return false; + } + + // Don't speculate loads. Note that it may be possible and desirable to + // speculate GOT or constant pool loads that are guaranteed not to trap, + // but we don't support that for now. + if (I->mayLoad()) { + DEBUG(dbgs() << "Won't speculate load: " << *I); + return false; + } + + // We never speculate stores, so an AA pointer isn't necessary. + bool DontMoveAcrossStore = true; + if (!I->isSafeToMove(nullptr, DontMoveAcrossStore)) { + DEBUG(dbgs() << "Can't speculate: " << *I); + return false; + } + + // Check for any dependencies on Head instructions. + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask()) { + DEBUG(dbgs() << "Won't speculate regmask: " << *I); + return false; + } + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + + // Remember clobbered regunits. + if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + ClobberedRegUnits.set(*Units); + + if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI || DefMI->getParent() != Head) + continue; + if (InsertAfter.insert(DefMI).second) + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " depends on " << *DefMI); + if (DefMI->isTerminator()) { + DEBUG(dbgs() << "Can't insert instructions below terminator.\n"); + return false; + } + } + } + return true; +} + + +/// Find an insertion point in Head for the speculated instructions. The +/// insertion point must be: +/// +/// 1. Before any terminators. +/// 2. After any instructions in InsertAfter. +/// 3. Not have any clobbered regunits live. +/// +/// This function sets InsertionPoint and returns true when successful, it +/// returns false if no valid insertion point could be found. +/// +bool SSAIfConv::findInsertionPoint() { + // Keep track of live regunits before the current position. + // Only track RegUnits that are also in ClobberedRegUnits. + LiveRegUnits.clear(); + SmallVector<unsigned, 8> Reads; + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + MachineBasicBlock::iterator I = Head->end(); + MachineBasicBlock::iterator B = Head->begin(); + while (I != B) { + --I; + // Some of the conditional code depends in I. + if (InsertAfter.count(I)) { + DEBUG(dbgs() << "Can't insert code after " << *I); + return false; + } + + // Update live regunits. + for (const MachineOperand &MO : I->operands()) { + // We're ignoring regmask operands. That is conservatively correct. + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // I clobbers Reg, so it isn't live before I. + if (MO.isDef()) + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + LiveRegUnits.erase(*Units); + // Unless I reads Reg. + if (MO.readsReg()) + Reads.push_back(Reg); + } + // Anything read by I is live before I. + while (!Reads.empty()) + for (MCRegUnitIterator Units(Reads.pop_back_val(), TRI); Units.isValid(); + ++Units) + if (ClobberedRegUnits.test(*Units)) + LiveRegUnits.insert(*Units); + + // We can't insert before a terminator. + if (I != FirstTerm && I->isTerminator()) + continue; + + // Some of the clobbered registers are live before I, not a valid insertion + // point. + if (!LiveRegUnits.empty()) { + DEBUG({ + dbgs() << "Would clobber"; + for (SparseSet<unsigned>::const_iterator + i = LiveRegUnits.begin(), e = LiveRegUnits.end(); i != e; ++i) + dbgs() << ' ' << PrintRegUnit(*i, TRI); + dbgs() << " live before " << *I; + }); + continue; + } + + // This is a valid insertion point. + InsertionPoint = I; + DEBUG(dbgs() << "Can insert before " << *I); + return true; + } + DEBUG(dbgs() << "No legal insertion point found.\n"); + return false; +} + + + +/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is +/// a potential candidate for if-conversion. Fill out the internal state. +/// +bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) { + Head = MBB; + TBB = FBB = Tail = nullptr; + + if (Head->succ_size() != 2) + return false; + MachineBasicBlock *Succ0 = Head->succ_begin()[0]; + MachineBasicBlock *Succ1 = Head->succ_begin()[1]; + + // Canonicalize so Succ0 has MBB as its single predecessor. + if (Succ0->pred_size() != 1) + std::swap(Succ0, Succ1); + + if (Succ0->pred_size() != 1 || Succ0->succ_size() != 1) + return false; + + Tail = Succ0->succ_begin()[0]; + + // This is not a triangle. + if (Tail != Succ1) { + // Check for a diamond. We won't deal with any critical edges. + if (Succ1->pred_size() != 1 || Succ1->succ_size() != 1 || + Succ1->succ_begin()[0] != Tail) + return false; + DEBUG(dbgs() << "\nDiamond: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << "/BB#" << Succ1->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + + // Live-in physregs are tricky to get right when speculating code. + if (!Tail->livein_empty()) { + DEBUG(dbgs() << "Tail has live-ins.\n"); + return false; + } + } else { + DEBUG(dbgs() << "\nTriangle: BB#" << Head->getNumber() + << " -> BB#" << Succ0->getNumber() + << " -> BB#" << Tail->getNumber() << '\n'); + } + + // This is a triangle or a diamond. + // If Tail doesn't have any phis, there must be side effects. + if (Tail->empty() || !Tail->front().isPHI()) { + DEBUG(dbgs() << "No phis in tail.\n"); + return false; + } + + // The branch we're looking to eliminate must be analyzable. + Cond.clear(); + if (TII->AnalyzeBranch(*Head, TBB, FBB, Cond)) { + DEBUG(dbgs() << "Branch not analyzable.\n"); + return false; + } + + // This is weird, probably some sort of degenerate CFG. + if (!TBB) { + DEBUG(dbgs() << "AnalyzeBranch didn't find conditional branch.\n"); + return false; + } + + // AnalyzeBranch doesn't set FBB on a fall-through branch. + // Make sure it is always set. + FBB = TBB == Succ0 ? Succ1 : Succ0; + + // Any phis in the tail block must be convertible to selects. + PHIs.clear(); + MachineBasicBlock *TPred = getTPred(); + MachineBasicBlock *FPred = getFPred(); + for (MachineBasicBlock::iterator I = Tail->begin(), E = Tail->end(); + I != E && I->isPHI(); ++I) { + PHIs.push_back(&*I); + PHIInfo &PI = PHIs.back(); + // Find PHI operands corresponding to TPred and FPred. + for (unsigned i = 1; i != PI.PHI->getNumOperands(); i += 2) { + if (PI.PHI->getOperand(i+1).getMBB() == TPred) + PI.TReg = PI.PHI->getOperand(i).getReg(); + if (PI.PHI->getOperand(i+1).getMBB() == FPred) + PI.FReg = PI.PHI->getOperand(i).getReg(); + } + assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI"); + assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI"); + + // Get target information. + if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg, + PI.CondCycles, PI.TCycles, PI.FCycles)) { + DEBUG(dbgs() << "Can't convert: " << *PI.PHI); + return false; + } + } + + // Check that the conditional instructions can be speculated. + InsertAfter.clear(); + ClobberedRegUnits.reset(); + if (TBB != Tail && !canSpeculateInstrs(TBB)) + return false; + if (FBB != Tail && !canSpeculateInstrs(FBB)) + return false; + + // Try to find a valid insertion point for the speculated instructions in the + // head basic block. + if (!findInsertionPoint()) + return false; + + if (isTriangle()) + ++NumTrianglesSeen; + else + ++NumDiamondsSeen; + return true; +} + +/// replacePHIInstrs - Completely replace PHI instructions with selects. +/// This is possible when the only Tail predecessors are the if-converted +/// blocks. +void SSAIfConv::replacePHIInstrs() { + assert(Tail->pred_size() == 2 && "Cannot replace PHIs"); + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + DEBUG(dbgs() << "If-converting " << *PI.PHI); + unsigned DstReg = PI.PHI->getOperand(0).getReg(); + TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); + PI.PHI->eraseFromParent(); + PI.PHI = nullptr; + } +} + +/// rewritePHIOperands - When there are additional Tail predecessors, insert +/// select instructions in Head and rewrite PHI operands to use the selects. +/// Keep the PHI instructions in Tail to handle the other predecessors. +void SSAIfConv::rewritePHIOperands() { + MachineBasicBlock::iterator FirstTerm = Head->getFirstTerminator(); + assert(FirstTerm != Head->end() && "No terminators"); + DebugLoc HeadDL = FirstTerm->getDebugLoc(); + + // Convert all PHIs to select instructions inserted before FirstTerm. + for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { + PHIInfo &PI = PHIs[i]; + unsigned DstReg = 0; + + DEBUG(dbgs() << "If-converting " << *PI.PHI); + if (PI.TReg == PI.FReg) { + // We do not need the select instruction if both incoming values are + // equal. + DstReg = PI.TReg; + } else { + unsigned PHIDst = PI.PHI->getOperand(0).getReg(); + DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst)); + TII->insertSelect(*Head, FirstTerm, HeadDL, + DstReg, Cond, PI.TReg, PI.FReg); + DEBUG(dbgs() << " --> " << *std::prev(FirstTerm)); + } + + // Rewrite PHI operands TPred -> (DstReg, Head), remove FPred. + for (unsigned i = PI.PHI->getNumOperands(); i != 1; i -= 2) { + MachineBasicBlock *MBB = PI.PHI->getOperand(i-1).getMBB(); + if (MBB == getTPred()) { + PI.PHI->getOperand(i-1).setMBB(Head); + PI.PHI->getOperand(i-2).setReg(DstReg); + } else if (MBB == getFPred()) { + PI.PHI->RemoveOperand(i-1); + PI.PHI->RemoveOperand(i-2); + } + } + DEBUG(dbgs() << " --> " << *PI.PHI); + } +} + +/// convertIf - Execute the if conversion after canConvertIf has determined the +/// feasibility. +/// +/// Any basic blocks erased will be added to RemovedBlocks. +/// +void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) { + assert(Head && Tail && TBB && FBB && "Call canConvertIf first."); + + // Update statistics. + if (isTriangle()) + ++NumTrianglesConv; + else + ++NumDiamondsConv; + + // Move all instructions into Head, except for the terminators. + if (TBB != Tail) + Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator()); + if (FBB != Tail) + Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator()); + + // Are there extra Tail predecessors? + bool ExtraPreds = Tail->pred_size() != 2; + if (ExtraPreds) + rewritePHIOperands(); + else + replacePHIInstrs(); + + // Fix up the CFG, temporarily leave Head without any successors. + Head->removeSuccessor(TBB); + Head->removeSuccessor(FBB, true); + if (TBB != Tail) + TBB->removeSuccessor(Tail, true); + if (FBB != Tail) + FBB->removeSuccessor(Tail, true); + + // Fix up Head's terminators. + // It should become a single branch or a fallthrough. + DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc(); + TII->RemoveBranch(*Head); + + // Erase the now empty conditional blocks. It is likely that Head can fall + // through to Tail, and we can join the two blocks. + if (TBB != Tail) { + RemovedBlocks.push_back(TBB); + TBB->eraseFromParent(); + } + if (FBB != Tail) { + RemovedBlocks.push_back(FBB); + FBB->eraseFromParent(); + } + + assert(Head->succ_empty() && "Additional head successors?"); + if (!ExtraPreds && Head->isLayoutSuccessor(Tail)) { + // Splice Tail onto the end of Head. + DEBUG(dbgs() << "Joining tail BB#" << Tail->getNumber() + << " into head BB#" << Head->getNumber() << '\n'); + Head->splice(Head->end(), Tail, + Tail->begin(), Tail->end()); + Head->transferSuccessorsAndUpdatePHIs(Tail); + RemovedBlocks.push_back(Tail); + Tail->eraseFromParent(); + } else { + // We need a branch to Tail, let code placement work it out later. + DEBUG(dbgs() << "Converting to unconditional branch.\n"); + SmallVector<MachineOperand, 0> EmptyCond; + TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL); + Head->addSuccessor(Tail); + } + DEBUG(dbgs() << *Head); +} + + +//===----------------------------------------------------------------------===// +// EarlyIfConverter Pass +//===----------------------------------------------------------------------===// + +namespace { +class EarlyIfConverter : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MCSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + SSAIfConv IfConv; + +public: + static char ID; + EarlyIfConverter() : MachineFunctionPass(ID) {} + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Early If-Conversion"; } + +private: + bool tryConvertIf(MachineBasicBlock*); + void updateDomTree(ArrayRef<MachineBasicBlock*> Removed); + void updateLoops(ArrayRef<MachineBasicBlock*> Removed); + void invalidateTraces(); + bool shouldConvertIf(); +}; +} // end anonymous namespace + +char EarlyIfConverter::ID = 0; +char &llvm::EarlyIfConverterID = EarlyIfConverter::ID; + +INITIALIZE_PASS_BEGIN(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(EarlyIfConverter, + "early-ifcvt", "Early If Converter", false, false) + +void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Update the dominator tree after if-conversion erased some blocks. +void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) { + // convertIf can remove TBB, FBB, and Tail can be merged into Head. + // TBB and FBB should not dominate any blocks. + // Tail children should be transferred to Head. + MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head); + for (unsigned i = 0, e = Removed.size(); i != e; ++i) { + MachineDomTreeNode *Node = DomTree->getNode(Removed[i]); + assert(Node != HeadNode && "Cannot erase the head node"); + while (Node->getNumChildren()) { + assert(Node->getBlock() == IfConv.Tail && "Unexpected children"); + DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode); + } + DomTree->eraseNode(Removed[i]); + } +} + +/// Update LoopInfo after if-conversion. +void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) { + if (!Loops) + return; + // If-conversion doesn't change loop structure, and it doesn't mess with back + // edges, so updating LoopInfo is simply removing the dead blocks. + for (unsigned i = 0, e = Removed.size(); i != e; ++i) + Loops->removeBlock(Removed[i]); +} + +/// Invalidate MachineTraceMetrics before if-conversion. +void EarlyIfConverter::invalidateTraces() { + Traces->verifyAnalysis(); + Traces->invalidate(IfConv.Head); + Traces->invalidate(IfConv.Tail); + Traces->invalidate(IfConv.TBB); + Traces->invalidate(IfConv.FBB); + Traces->verifyAnalysis(); +} + +// Adjust cycles with downward saturation. +static unsigned adjCycles(unsigned Cyc, int Delta) { + if (Delta < 0 && Cyc + Delta > Cyc) + return 0; + return Cyc + Delta; +} + +/// Apply cost model and heuristics to the if-conversion in IfConv. +/// Return true if the conversion is a good idea. +/// +bool EarlyIfConverter::shouldConvertIf() { + // Stress testing mode disables all cost considerations. + if (Stress) + return true; + + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + + MachineTraceMetrics::Trace TBBTrace = MinInstr->getTrace(IfConv.getTPred()); + MachineTraceMetrics::Trace FBBTrace = MinInstr->getTrace(IfConv.getFPred()); + DEBUG(dbgs() << "TBB: " << TBBTrace << "FBB: " << FBBTrace); + unsigned MinCrit = std::min(TBBTrace.getCriticalPath(), + FBBTrace.getCriticalPath()); + + // Set a somewhat arbitrary limit on the critical path extension we accept. + unsigned CritLimit = SchedModel.MispredictPenalty/2; + + // If-conversion only makes sense when there is unexploited ILP. Compute the + // maximum-ILP resource length of the trace after if-conversion. Compare it + // to the shortest critical path. + SmallVector<const MachineBasicBlock*, 1> ExtraBlocks; + if (IfConv.TBB != IfConv.Tail) + ExtraBlocks.push_back(IfConv.TBB); + unsigned ResLength = FBBTrace.getResourceLength(ExtraBlocks); + DEBUG(dbgs() << "Resource length " << ResLength + << ", minimal critical path " << MinCrit << '\n'); + if (ResLength > MinCrit + CritLimit) { + DEBUG(dbgs() << "Not enough available ILP.\n"); + return false; + } + + // Assume that the depth of the first head terminator will also be the depth + // of the select instruction inserted, as determined by the flag dependency. + // TBB / FBB data dependencies may delay the select even more. + MachineTraceMetrics::Trace HeadTrace = MinInstr->getTrace(IfConv.Head); + unsigned BranchDepth = + HeadTrace.getInstrCycles(IfConv.Head->getFirstTerminator()).Depth; + DEBUG(dbgs() << "Branch depth: " << BranchDepth << '\n'); + + // Look at all the tail phis, and compute the critical path extension caused + // by inserting select instructions. + MachineTraceMetrics::Trace TailTrace = MinInstr->getTrace(IfConv.Tail); + for (unsigned i = 0, e = IfConv.PHIs.size(); i != e; ++i) { + SSAIfConv::PHIInfo &PI = IfConv.PHIs[i]; + unsigned Slack = TailTrace.getInstrSlack(PI.PHI); + unsigned MaxDepth = Slack + TailTrace.getInstrCycles(PI.PHI).Depth; + DEBUG(dbgs() << "Slack " << Slack << ":\t" << *PI.PHI); + + // The condition is pulled into the critical path. + unsigned CondDepth = adjCycles(BranchDepth, PI.CondCycles); + if (CondDepth > MaxDepth) { + unsigned Extra = CondDepth - MaxDepth; + DEBUG(dbgs() << "Condition adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The TBB value is pulled into the critical path. + unsigned TDepth = adjCycles(TBBTrace.getPHIDepth(PI.PHI), PI.TCycles); + if (TDepth > MaxDepth) { + unsigned Extra = TDepth - MaxDepth; + DEBUG(dbgs() << "TBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + + // The FBB value is pulled into the critical path. + unsigned FDepth = adjCycles(FBBTrace.getPHIDepth(PI.PHI), PI.FCycles); + if (FDepth > MaxDepth) { + unsigned Extra = FDepth - MaxDepth; + DEBUG(dbgs() << "FBB data adds " << Extra << " cycles.\n"); + if (Extra > CritLimit) { + DEBUG(dbgs() << "Exceeds limit of " << CritLimit << '\n'); + return false; + } + } + } + return true; +} + +/// Attempt repeated if-conversion on MBB, return true if successful. +/// +bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) { + bool Changed = false; + while (IfConv.canConvertIf(MBB) && shouldConvertIf()) { + // If-convert MBB and update analyses. + invalidateTraces(); + SmallVector<MachineBasicBlock*, 4> RemovedBlocks; + IfConv.convertIf(RemovedBlocks); + Changed = true; + updateDomTree(RemovedBlocks); + updateLoops(RemovedBlocks); + } + return Changed; +} + +bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** EARLY IF-CONVERSION **********\n" + << "********** Function: " << MF.getName() << '\n'); + // Only run if conversion if the target wants it. + const TargetSubtargetInfo &STI = MF.getSubtarget(); + if (!STI.enableEarlyIfConversion()) + return false; + + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); + MRI = &MF.getRegInfo(); + DomTree = &getAnalysis<MachineDominatorTree>(); + Loops = getAnalysisIfAvailable<MachineLoopInfo>(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = nullptr; + + bool Changed = false; + IfConv.runOnMachineFunction(MF); + + // Visit blocks in dominator tree post-order. The post-order enables nested + // if-conversion in a single pass. The tryConvertIf() function may erase + // blocks, but only blocks dominated by the head block. This makes it safe to + // update the dominator tree while the post-order iterator is still active. + for (auto DomNode : post_order(DomTree)) + if (tryConvertIf(DomNode->getBlock())) + Changed = true; + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp new file mode 100644 index 0000000..aea7c31 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp @@ -0,0 +1,97 @@ +//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the implementation of the EdgeBundles analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +static cl::opt<bool> +ViewEdgeBundles("view-edge-bundles", cl::Hidden, + cl::desc("Pop up a window to show edge bundle graphs")); + +char EdgeBundles::ID = 0; + +INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges", + /* cfg = */true, /* analysis = */ true) + +char &llvm::EdgeBundlesID = EdgeBundles::ID; + +void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + EC.clear(); + EC.grow(2 * MF->getNumBlockIDs()); + + for (const auto &MBB : *MF) { + unsigned OutE = 2 * MBB.getNumber() + 1; + // Join the outgoing bundle with the ingoing bundles of all successors. + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + EC.join(OutE, 2 * (*SI)->getNumber()); + } + EC.compress(); + if (ViewEdgeBundles) + view(); + + // Compute the reverse mapping. + Blocks.clear(); + Blocks.resize(getNumBundles()); + + for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { + unsigned b0 = getBundle(i, 0); + unsigned b1 = getBundle(i, 1); + Blocks[b0].push_back(i); + if (b1 != b0) + Blocks[b1].push_back(i); + } + + return false; +} + +/// Specialize WriteGraph, the standard implementation won't work. +namespace llvm { +template<> +raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G, + bool ShortNames, + const Twine &Title) { + const MachineFunction *MF = G.getMachineFunction(); + + O << "digraph {\n"; + for (const auto &MBB : *MF) { + unsigned BB = MBB.getNumber(); + O << "\t\"BB#" << BB << "\" [ shape=box ]\n" + << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n" + << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n'; + for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), + SE = MBB.succ_end(); SI != SE; ++SI) + O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber() + << "\" [ color=lightgray ]\n"; + } + O << "}\n"; + return O; +} +} + +/// view - Visualize the annotated bipartite CFG with Graphviz. +void EdgeBundles::view() const { + ViewGraph(*this, "EdgeBundles"); +} diff --git a/contrib/llvm/lib/CodeGen/ErlangGC.cpp b/contrib/llvm/lib/CodeGen/ErlangGC.cpp new file mode 100644 index 0000000..024946d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ErlangGC.cpp @@ -0,0 +1,46 @@ +//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Erlang/OTP runtime-compatible garbage collector +// (e.g. defines safe points, root initialization etc.) +// +// The frametable emitter is in ErlangGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { + +class ErlangGC : public GCStrategy { +public: + ErlangGC(); +}; +} + +static GCRegistry::Add<ErlangGC> X("erlang", + "erlang-compatible garbage collector"); + +void llvm::linkErlangGC() {} + +ErlangGC::ErlangGC() { + InitRoots = false; + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + CustomRoots = false; +} diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp new file mode 100644 index 0000000..c550008 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -0,0 +1,802 @@ +//===- ExecutionDepsFix.cpp - Fix execution dependecy issues ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the execution dependency fix pass. +// +// Some X86 SSE instructions like mov, and, or, xor are available in different +// variants for different operand types. These variant instructions are +// equivalent, but on Nehalem and newer cpus there is extra latency +// transferring data between integer and floating point domains. ARM cores +// have similar issues when they are configured with both VFP and NEON +// pipelines. +// +// This pass changes the variant instructions to minimize domain crossings. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "execution-fix" + +/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track +/// of execution domains. +/// +/// An open DomainValue represents a set of instructions that can still switch +/// execution domain. Multiple registers may refer to the same open +/// DomainValue - they will eventually be collapsed to the same execution +/// domain. +/// +/// A collapsed DomainValue represents a single register that has been forced +/// into one of more execution domains. There is a separate collapsed +/// DomainValue for each register, but it may contain multiple execution +/// domains. A register value is initially created in a single execution +/// domain, but if we were forced to pay the penalty of a domain crossing, we +/// keep track of the fact that the register is now available in multiple +/// domains. +namespace { +struct DomainValue { + // Basic reference counting. + unsigned Refs; + + // Bitmask of available domains. For an open DomainValue, it is the still + // possible domains for collapsing. For a collapsed DomainValue it is the + // domains where the register is available for free. + unsigned AvailableDomains; + + // Pointer to the next DomainValue in a chain. When two DomainValues are + // merged, Victim.Next is set to point to Victor, so old DomainValue + // references can be updated by following the chain. + DomainValue *Next; + + // Twiddleable instructions using or defining these registers. + SmallVector<MachineInstr*, 8> Instrs; + + // A collapsed DomainValue has no instructions to twiddle - it simply keeps + // track of the domains where the registers are already available. + bool isCollapsed() const { return Instrs.empty(); } + + // Is domain available? + bool hasDomain(unsigned domain) const { + assert(domain < + static_cast<unsigned>(std::numeric_limits<unsigned>::digits) && + "undefined behavior"); + return AvailableDomains & (1u << domain); + } + + // Mark domain as available. + void addDomain(unsigned domain) { + AvailableDomains |= 1u << domain; + } + + // Restrict to a single domain available. + void setSingleDomain(unsigned domain) { + AvailableDomains = 1u << domain; + } + + // Return bitmask of domains that are available and in mask. + unsigned getCommonDomains(unsigned mask) const { + return AvailableDomains & mask; + } + + // First domain available. + unsigned getFirstDomain() const { + return countTrailingZeros(AvailableDomains); + } + + DomainValue() : Refs(0) { clear(); } + + // Clear this DomainValue and point to next which has all its data. + void clear() { + AvailableDomains = 0; + Next = nullptr; + Instrs.clear(); + } +}; +} + +namespace { +/// Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; +} // anonymous namespace + +namespace { +class ExeDepsFix : public MachineFunctionPass { + static char ID; + SpecificBumpPtrAllocator<DomainValue> Allocator; + SmallVector<DomainValue*,16> Avail; + + const TargetRegisterClass *const RC; + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + std::vector<SmallVector<int, 1>> AliasMap; + const unsigned NumRegs; + LiveReg *LiveRegs; + typedef DenseMap<MachineBasicBlock*, LiveReg*> LiveOutMap; + LiveOutMap LiveOuts; + + /// List of undefined register reads in this block in forward order. + std::vector<std::pair<MachineInstr*, unsigned> > UndefReads; + + /// Storage for register unit liveness. + LivePhysRegs LiveRegSet; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// True when the current block has a predecessor that hasn't been visited + /// yet. + bool SeenUnknownBackEdge; + +public: + ExeDepsFix(const TargetRegisterClass *rc) + : MachineFunctionPass(ID), RC(rc), NumRegs(RC->getNumRegs()) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + const char *getPassName() const override { + return "Execution dependency fix"; + } + +private: + iterator_range<SmallVectorImpl<int>::const_iterator> + regIndices(unsigned Reg) const; + + // DomainValue allocation. + DomainValue *alloc(int domain = -1); + DomainValue *retain(DomainValue *DV) { + if (DV) ++DV->Refs; + return DV; + } + void release(DomainValue*); + DomainValue *resolve(DomainValue*&); + + // LiveRegs manipulations. + void setLiveReg(int rx, DomainValue *DV); + void kill(int rx); + void force(int rx, unsigned domain); + void collapse(DomainValue *dv, unsigned domain); + bool merge(DomainValue *A, DomainValue *B); + + void enterBasicBlock(MachineBasicBlock*); + void leaveBasicBlock(MachineBasicBlock*); + void visitInstr(MachineInstr*); + void processDefs(MachineInstr*, bool Kill); + void visitSoftInstr(MachineInstr*, unsigned mask); + void visitHardInstr(MachineInstr*, unsigned domain); + bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); + void processUndefReads(MachineBasicBlock*); +}; +} + +char ExeDepsFix::ID = 0; + +/// Translate TRI register number to a list of indices into our smaller tables +/// of interesting registers. +iterator_range<SmallVectorImpl<int>::const_iterator> +ExeDepsFix::regIndices(unsigned Reg) const { + assert(Reg < AliasMap.size() && "Invalid register"); + const auto &Entry = AliasMap[Reg]; + return make_range(Entry.begin(), Entry.end()); +} + +DomainValue *ExeDepsFix::alloc(int domain) { + DomainValue *dv = Avail.empty() ? + new(Allocator.Allocate()) DomainValue : + Avail.pop_back_val(); + if (domain >= 0) + dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); + return dv; +} + +/// Release a reference to DV. When the last reference is released, +/// collapse if needed. +void ExeDepsFix::release(DomainValue *DV) { + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} + +/// Follow the chain of dead DomainValues until a live DomainValue is reached. +/// Update the referenced pointer when necessary. +DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; +} + +/// Set LiveRegs[rx] = dv, updating reference counts. +void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(LiveRegs && "Must enter basic block first."); + + if (LiveRegs[rx].Value == dv) + return; + if (LiveRegs[rx].Value) + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = retain(dv); +} + +// Kill register rx, recycle or collapse any DomainValue. +void ExeDepsFix::kill(int rx) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(LiveRegs && "Must enter basic block first."); + if (!LiveRegs[rx].Value) + return; + + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = nullptr; +} + +/// Force register rx into domain. +void ExeDepsFix::force(int rx, unsigned domain) { + assert(unsigned(rx) < NumRegs && "Invalid index"); + assert(LiveRegs && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx].Value) { + if (dv->isCollapsed()) + dv->addDomain(domain); + else if (dv->hasDomain(domain)) + collapse(dv, domain); + else { + // This is an incompatible open DomainValue. Collapse it to whatever and + // force the new value into domain. This costs a domain crossing. + collapse(dv, dv->getFirstDomain()); + assert(LiveRegs[rx].Value && "Not live after collapse?"); + LiveRegs[rx].Value->addDomain(domain); + } + } else { + // Set up basic collapsed DomainValue. + setLiveReg(rx, alloc(domain)); + } +} + +/// Collapse open DomainValue into given domain. If there are multiple +/// registers using dv, they each get a unique collapsed DomainValue. +void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { + assert(dv->hasDomain(domain) && "Cannot collapse"); + + // Collapse all the instructions. + while (!dv->Instrs.empty()) + TII->setExecutionDomain(dv->Instrs.pop_back_val(), domain); + dv->setSingleDomain(domain); + + // If there are multiple users, give them new, unique DomainValues. + if (LiveRegs && dv->Refs > 1) + for (unsigned rx = 0; rx != NumRegs; ++rx) + if (LiveRegs[rx].Value == dv) + setLiveReg(rx, alloc(domain)); +} + +/// All instructions and registers in B are moved to A, and B is released. +bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { + assert(!A->isCollapsed() && "Cannot merge into collapsed"); + assert(!B->isCollapsed() && "Cannot merge from collapsed"); + if (A == B) + return true; + // Restrict to the domains that A and B have in common. + unsigned common = A->getCommonDomains(B->AvailableDomains); + if (!common) + return false; + A->AvailableDomains = common; + A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + assert(LiveRegs && "no space allocated for live registers"); + if (LiveRegs[rx].Value == B) + setLiveReg(rx, A); + } + return true; +} + +/// Set up LiveRegs by merging predecessor live-out values. +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { + // Detect back-edges from predecessors we haven't processed yet. + SeenUnknownBackEdge = false; + + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up UndefReads to track undefined register reads. + UndefReads.clear(); + LiveRegSet.clear(); + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = nullptr; + LiveRegs[rx].Def = -(1 << 20); + } + + // This is the entry block. + if (MBB->pred_empty()) { + for (const auto &LI : MBB->liveins()) { + for (int rx : regIndices(LI.PhysReg)) { + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) { + SeenUnknownBackEdge = true; + continue; + } + assert(fi->second && "Can't have NULL entries"); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + // Use the most recent predecessor def for each register. + LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def); + + DomainValue *pdv = resolve(fi->second[rx].Value); + if (!pdv) + continue; + if (!LiveRegs[rx].Value) { + setLiveReg(rx, pdv); + continue; + } + + // We have a live DomainValue from more than one predecessor. + if (LiveRegs[rx].Value->isCollapsed()) { + // We are already collapsed, but predecessor is not. Force it. + unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); + continue; + } + + // Currently open, merge in predecessor. + if (!pdv->isCollapsed()) + merge(LiveRegs[rx].Value, pdv); + else + force(rx, pdv->getFirstDomain()); + } + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n")); +} + +void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + assert(LiveRegs && "Must enter basic block first."); + // Save live registers at end of MBB - used by enterBasicBlock(). + // Also use LiveOuts as a visited set to detect back-edges. + bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second; + + if (First) { + // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to + // the end of this block instead of the beginning. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + LiveRegs[i].Def -= CurInstr; + } else { + // Insertion failed, this must be the second pass. + // Release all the DomainValues instead of keeping them. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + release(LiveRegs[i].Value); + delete[] LiveRegs; + } + LiveRegs = nullptr; +} + +void ExeDepsFix::visitInstr(MachineInstr *MI) { + if (MI->isDebugValue()) + return; + + // Update instructions with explicit execution domains. + std::pair<uint16_t, uint16_t> DomP = TII->getExecutionDomain(MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); + else + visitHardInstr(MI, DomP.first); + } + + // Process defs to track register ages, and kill values clobbered by generic + // instructions. + processDefs(MI, !DomP.first); +} + +/// \brief Return true to if it makes sense to break dependence on a partial def +/// or undef use. +bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + unsigned reg = MI->getOperand(OpIdx).getReg(); + for (int rx : regIndices(reg)) { + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + continue; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + return false; + } + return true; +} + +// Update def-ages for registers defined by MI. +// If Kill is set, also kill off DomainValues clobbered by the defs. +// +// Also break dependencies on partial defs and undef uses. +void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI); + if (Pref) { + if (shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isImplicit()) + break; + if (MO.isUse()) + continue; + for (int rx : regIndices(MO.getReg())) { + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + + // How many instructions since rx was last written? + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + } + } + ++CurInstr; +} + +/// \break Break false dependencies on undefined register reads. +/// +/// Walk the block backward computing precise liveness. This is expensive, so we +/// only do it on demand. Note that the occurrence of undefined register reads +/// that should be broken is very rare, but when they occur we may have many in +/// a single block. +void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; + + // Collect this block's live out register units. + LiveRegSet.init(TRI); + LiveRegSet.addLiveOuts(MBB); + + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; + + for (MachineInstr &I : make_range(MBB->rbegin(), MBB->rend())) { + // Update liveness, including the current instruction's defs. + LiveRegSet.stepBackward(I); + + if (UndefMI == &I) { + if (!LiveRegSet.contains(UndefMI->getOperand(OpIdx).getReg())) + TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } +} + +// A hard instruction only works in one domain. All input registers will be +// forced into that domain. +void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { + // Collapse all uses. + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + for (int rx : regIndices(mo.getReg())) { + force(rx, domain); + } + } + + // Kill all defs and force them. + for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + for (int rx : regIndices(mo.getReg())) { + kill(rx); + force(rx, domain); + } + } +} + +// A soft instruction can be changed to work in other domains given by mask. +void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { + // Bitmask of available domains for this instruction after taking collapsed + // operands into account. + unsigned available = mask; + + // Scan the explicit use operands for incoming domains. + SmallVector<int, 4> used; + if (LiveRegs) + for (unsigned i = mi->getDesc().getNumDefs(), + e = mi->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &mo = mi->getOperand(i); + if (!mo.isReg()) continue; + for (int rx : regIndices(mo.getReg())) { + DomainValue *dv = LiveRegs[rx].Value; + if (dv == nullptr) + continue; + // Bitmask of domains that dv and available have in common. + unsigned common = dv->getCommonDomains(available); + // Is it possible to use this collapsed register for free? + if (dv->isCollapsed()) { + // Restrict available domains to the ones in common with the operand. + // If there are no common domains, we must pay the cross-domain + // penalty for this operand. + if (common) available = common; + } else if (common) + // Open DomainValue is compatible, save it for merging. + used.push_back(rx); + else + // Open DomainValue is not compatible with instruction. It is useless + // now. + kill(rx); + } + } + + // If the collapsed operands force a single domain, propagate the collapse. + if (isPowerOf2_32(available)) { + unsigned domain = countTrailingZeros(available); + TII->setExecutionDomain(mi, domain); + visitHardInstr(mi, domain); + return; + } + + // Kill off any remaining uses that don't match available, and build a list of + // incoming DomainValues that we want to merge. + SmallVector<LiveReg, 4> Regs; + for (SmallVectorImpl<int>::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + int rx = *i; + assert(LiveRegs && "no space allocated for live registers"); + const LiveReg &LR = LiveRegs[rx]; + // This useless DomainValue could have been missed above. + if (!LR.Value->getCommonDomains(available)) { + kill(rx); + continue; + } + // Sorted insertion. + bool Inserted = false; + for (SmallVectorImpl<LiveReg>::iterator i = Regs.begin(), e = Regs.end(); + i != e && !Inserted; ++i) { + if (LR.Def < i->Def) { + Inserted = true; + Regs.insert(i, LR); + } + } + if (!Inserted) + Regs.push_back(LR); + } + + // doms are now sorted in order of appearance. Try to merge them all, giving + // priority to the latest ones. + DomainValue *dv = nullptr; + while (!Regs.empty()) { + if (!dv) { + dv = Regs.pop_back_val().Value; + // Force the first dv to match the current instruction. + dv->AvailableDomains = dv->getCommonDomains(available); + assert(dv->AvailableDomains && "Domain should have been filtered"); + continue; + } + + DomainValue *Latest = Regs.pop_back_val().Value; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; + + // If latest didn't merge, it is useless now. Kill all registers using it. + for (int i : used) { + assert(LiveRegs && "no space allocated for live registers"); + if (LiveRegs[i].Value == Latest) + kill(i); + } + } + + // dv is the DomainValue we are going to use for this instruction. + if (!dv) { + dv = alloc(); + dv->AvailableDomains = available; + } + dv->Instrs.push_back(mi); + + // Finally set all defs and non-collapsed uses to dv. We must iterate through + // all the operators, including imp-def ones. + for (MachineInstr::mop_iterator ii = mi->operands_begin(), + ee = mi->operands_end(); + ii != ee; ++ii) { + MachineOperand &mo = *ii; + if (!mo.isReg()) continue; + for (int rx : regIndices(mo.getReg())) { + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { + kill(rx); + setLiveReg(rx, dv); + } + } + } +} + +bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + LiveRegs = nullptr; + assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + + DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " + << TRI->getRegClassName(RC) << " **********\n"); + + // If no relevant registers are used in the function, we can skip it + // completely. + bool anyregs = false; + const MachineRegisterInfo &MRI = mf.getRegInfo(); + for (unsigned Reg : *RC) { + if (MRI.isPhysRegUsed(Reg)) { + anyregs = true; + break; + } + } + if (!anyregs) return false; + + // Initialize the AliasMap on the first use. + if (AliasMap.empty()) { + // Given a PhysReg, AliasMap[PhysReg] returns a list of indices into RC and + // therefore the LiveRegs array. + AliasMap.resize(TRI->getNumRegs()); + for (unsigned i = 0, e = RC->getNumRegs(); i != e; ++i) + for (MCRegAliasIterator AI(RC->getRegister(i), TRI, true); + AI.isValid(); ++AI) + AliasMap[*AI].push_back(i); + } + + MachineBasicBlock *Entry = &*MF->begin(); + ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry); + SmallVector<MachineBasicBlock*, 16> Loops; + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + MachineBasicBlock *MBB = *MBBI; + enterBasicBlock(MBB); + if (SeenUnknownBackEdge) + Loops.push_back(MBB); + for (MachineInstr &MI : *MBB) + visitInstr(&MI); + processUndefReads(MBB); + leaveBasicBlock(MBB); + } + + // Visit all the loop blocks again in order to merge DomainValues from + // back-edges. + for (MachineBasicBlock *MBB : Loops) { + enterBasicBlock(MBB); + for (MachineInstr &MI : *MBB) + if (!MI.isDebugValue()) + processDefs(&MI, false); + processUndefReads(MBB); + leaveBasicBlock(MBB); + } + + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); + if (FI == LiveOuts.end() || !FI->second) + continue; + for (unsigned i = 0, e = NumRegs; i != e; ++i) + if (FI->second[i].Value) + release(FI->second[i].Value); + delete[] FI->second; + } + LiveOuts.clear(); + UndefReads.clear(); + Avail.clear(); + Allocator.DestroyAll(); + + return false; +} + +FunctionPass * +llvm::createExecutionDependencyFixPass(const TargetRegisterClass *RC) { + return new ExeDepsFix(RC); +} diff --git a/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp new file mode 100644 index 0000000..90ddac9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExpandISelPseudos.cpp @@ -0,0 +1,75 @@ +//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Expand Pseudo-instructions produced by ISel. These are usually to allow +// the expansion to contain control flow, such as a conditional move +// implemented with a conditional branch and a phi, or an atomic operation +// implemented with a loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "expand-isel-pseudos" + +namespace { + class ExpandISelPseudos : public MachineFunctionPass { + public: + static char ID; // Pass identification, replacement for typeid + ExpandISelPseudos() : MachineFunctionPass(ID) {} + + private: + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} // end anonymous namespace + +char ExpandISelPseudos::ID = 0; +char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID; +INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos", + "Expand ISel Pseudo-instructions", false, false) + +bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); + + // Iterate through each instruction in the function, looking for pseudos. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI != MBBE; ) { + MachineInstr *MI = MBBI++; + + // If MI is a pseudo, expand it. + if (MI->usesCustomInsertionHook()) { + Changed = true; + MachineBasicBlock *NewMBB = + TLI->EmitInstrWithCustomInserter(MI, MBB); + // The expansion may involve new basic blocks. + if (NewMBB != MBB) { + MBB = NewMBB; + I = NewMBB->getIterator(); + MBBI = NewMBB->begin(); + MBBE = NewMBB->end(); + } + } + } + } + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp new file mode 100644 index 0000000..e7bf143 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -0,0 +1,227 @@ +//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a pass that expands COPY and SUBREG_TO_REG pseudo +// instructions after register allocation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "postrapseudos" + +namespace { +struct ExpandPostRA : public MachineFunctionPass { +private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + +public: + static char ID; // Pass identification, replacement for typeid + ExpandPostRA() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// runOnMachineFunction - pass entry point + bool runOnMachineFunction(MachineFunction&) override; + +private: + bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); + + void TransferImplicitDefs(MachineInstr *MI); +}; +} // end anonymous namespace + +char ExpandPostRA::ID = 0; +char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; + +INITIALIZE_PASS(ExpandPostRA, "postrapseudos", + "Post-RA pseudo instruction expansion pass", false, false) + +/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit-def +/// operands from MI to the replacement instruction. +void +ExpandPostRA::TransferImplicitDefs(MachineInstr *MI) { + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) + continue; + CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); + } +} + +bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && + MI->getOperand(1).isImm() && + (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && + MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned InsReg = MI->getOperand(2).getReg(); + assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); + unsigned SubIdx = MI->getOperand(3).getImm(); + + assert(SubIdx != 0 && "Invalid index for insert_subreg"); + unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); + + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Insert destination must be in a physical register"); + assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && + "Inserted value must be in a physical register"); + + DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + + if (MI->allDefsAreDead()) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "subreg: replaced by: " << *MI); + return true; + } + + if (DstSubReg == InsReg) { + // No need to insert an identity copy instruction. + // Watch out for case like this: + // %RAX<def> = SUBREG_TO_REG 0, %EAX<kill>, 3 + // We must leave %RAX live. + if (DstReg != InsReg) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm + DEBUG(dbgs() << "subreg: replace by: " << *MI); + return true; + } + DEBUG(dbgs() << "subreg: eliminated!"); + } else { + TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, + MI->getOperand(2).isKill()); + + // Implicitly define DstReg for subsequent uses. + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + CopyMI->addRegisterDefined(DstReg); + DEBUG(dbgs() << "subreg: " << *CopyMI); + } + + DEBUG(dbgs() << '\n'); + MBB->erase(MI); + return true; +} + +bool ExpandPostRA::LowerCopy(MachineInstr *MI) { + + if (MI->allDefsAreDead()) { + DEBUG(dbgs() << "dead copy: " << *MI); + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); + + if (SrcMO.getReg() == DstMO.getReg()) { + DEBUG(dbgs() << "identity copy: " << *MI); + // No need to insert an identity copy instruction, but replace with a KILL + // if liveness is changed. + if (SrcMO.isUndef() || MI->getNumOperands() > 2) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + // Vanilla identity copy. + MI->eraseFromParent(); + return true; + } + + DEBUG(dbgs() << "real copy: " << *MI); + TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), + DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); + + if (MI->getNumOperands() > 2) + TransferImplicitDefs(MI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "replaced by: " << *(--dMI); + }); + MI->eraseFromParent(); + return true; +} + +/// runOnMachineFunction - Reduce subregister inserts and extracts to register +/// copies. +/// +bool ExpandPostRA::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "Machine Function\n" + << "********** EXPANDING POST-RA PSEUDO INSTRS **********\n" + << "********** Function: " << MF.getName() << '\n'); + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + bool MadeChange = false; + + for (MachineFunction::iterator mbbi = MF.begin(), mbbe = MF.end(); + mbbi != mbbe; ++mbbi) { + for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end(); + mi != me;) { + MachineInstr *MI = mi; + // Advance iterator here because MI may be erased. + ++mi; + + // Only expand pseudos. + if (!MI->isPseudo()) + continue; + + // Give targets a chance to expand even standard pseudos. + if (TII->expandPostRAPseudo(MI)) { + MadeChange = true; + continue; + } + + // Expand standard pseudos. + switch (MI->getOpcode()) { + case TargetOpcode::SUBREG_TO_REG: + MadeChange |= LowerSubregToReg(MI); + break; + case TargetOpcode::COPY: + MadeChange |= LowerCopy(MI); + break; + case TargetOpcode::DBG_VALUE: + continue; + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::EXTRACT_SUBREG: + llvm_unreachable("Sub-register pseudos should have been eliminated."); + } + } + } + + return MadeChange; +} diff --git a/contrib/llvm/lib/CodeGen/FaultMaps.cpp b/contrib/llvm/lib/CodeGen/FaultMaps.cpp new file mode 100644 index 0000000..2acafaf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/FaultMaps.cpp @@ -0,0 +1,150 @@ +//===---------------------------- FaultMaps.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FaultMaps.h" + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "faultmaps" + +static const int FaultMapVersion = 1; +const char *FaultMaps::WFMP = "Fault Maps: "; + +FaultMaps::FaultMaps(AsmPrinter &AP) : AP(AP) {} + +void FaultMaps::recordFaultingOp(FaultKind FaultTy, + const MCSymbol *HandlerLabel) { + MCContext &OutContext = AP.OutStreamer->getContext(); + MCSymbol *FaultingLabel = OutContext.createTempSymbol(); + + AP.OutStreamer->EmitLabel(FaultingLabel); + + const MCExpr *FaultingOffset = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(FaultingLabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); + + const MCExpr *HandlerOffset = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(HandlerLabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); + + FunctionInfos[AP.CurrentFnSym].emplace_back(FaultTy, FaultingOffset, + HandlerOffset); +} + +void FaultMaps::serializeToFaultMapSection() { + if (FunctionInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer->getContext(); + MCStreamer &OS = *AP.OutStreamer; + + // Create the section. + MCSection *FaultMapSection = + OutContext.getObjectFileInfo()->getFaultMapSection(); + OS.SwitchSection(FaultMapSection); + + // Emit a dummy symbol to force section inclusion. + OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_FaultMaps"))); + + DEBUG(dbgs() << "********** Fault Map Output **********\n"); + + // Header + OS.EmitIntValue(FaultMapVersion, 1); // Version. + OS.EmitIntValue(0, 1); // Reserved. + OS.EmitIntValue(0, 2); // Reserved. + + DEBUG(dbgs() << WFMP << "#functions = " << FunctionInfos.size() << "\n"); + OS.EmitIntValue(FunctionInfos.size(), 4); + + DEBUG(dbgs() << WFMP << "functions:\n"); + + for (const auto &FFI : FunctionInfos) + emitFunctionInfo(FFI.first, FFI.second); +} + +void FaultMaps::emitFunctionInfo(const MCSymbol *FnLabel, + const FunctionFaultInfos &FFI) { + MCStreamer &OS = *AP.OutStreamer; + + DEBUG(dbgs() << WFMP << " function addr: " << *FnLabel << "\n"); + OS.EmitSymbolValue(FnLabel, 8); + + DEBUG(dbgs() << WFMP << " #faulting PCs: " << FFI.size() << "\n"); + OS.EmitIntValue(FFI.size(), 4); + + OS.EmitIntValue(0, 4); // Reserved + + for (auto &Fault : FFI) { + DEBUG(dbgs() << WFMP << " fault type: " + << faultTypeToString(Fault.Kind) << "\n"); + OS.EmitIntValue(Fault.Kind, 4); + + DEBUG(dbgs() << WFMP << " faulting PC offset: " + << *Fault.FaultingOffsetExpr << "\n"); + OS.EmitValue(Fault.FaultingOffsetExpr, 4); + + DEBUG(dbgs() << WFMP << " fault handler PC offset: " + << *Fault.HandlerOffsetExpr << "\n"); + OS.EmitValue(Fault.HandlerOffsetExpr, 4); + } +} + + +const char *FaultMaps::faultTypeToString(FaultMaps::FaultKind FT) { + switch (FT) { + default: + llvm_unreachable("unhandled fault type!"); + + case FaultMaps::FaultingLoad: + return "FaultingLoad"; + } +} + +raw_ostream &llvm:: +operator<<(raw_ostream &OS, + const FaultMapParser::FunctionFaultInfoAccessor &FFI) { + OS << "Fault kind: " + << FaultMaps::faultTypeToString((FaultMaps::FaultKind)FFI.getFaultKind()) + << ", faulting PC offset: " << FFI.getFaultingPCOffset() + << ", handling PC offset: " << FFI.getHandlerPCOffset(); + return OS; +} + +raw_ostream &llvm:: +operator<<(raw_ostream &OS, const FaultMapParser::FunctionInfoAccessor &FI) { + OS << "FunctionAddress: " << format_hex(FI.getFunctionAddr(), 8) + << ", NumFaultingPCs: " << FI.getNumFaultingPCs() << "\n"; + for (unsigned i = 0, e = FI.getNumFaultingPCs(); i != e; ++i) + OS << FI.getFunctionFaultInfoAt(i) << "\n"; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const FaultMapParser &FMP) { + OS << "Version: " << format_hex(FMP.getFaultMapVersion(), 2) << "\n"; + OS << "NumFunctions: " << FMP.getNumFunctions() << "\n"; + + if (FMP.getNumFunctions() == 0) + return OS; + + FaultMapParser::FunctionInfoAccessor FI; + + for (unsigned i = 0, e = FMP.getNumFunctions(); i != e; ++i) { + FI = (i == 0) ? FMP.getFirstFunctionInfo() : FI.getNextFunctionInfo(); + OS << FI; + } + + return OS; +} diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp new file mode 100644 index 0000000..8b2f505 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp @@ -0,0 +1,55 @@ +//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations which result in +// funclets being contiguous. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +using namespace llvm; + +#define DEBUG_TYPE "funclet-layout" + +namespace { +class FuncletLayout : public MachineFunctionPass { +public: + static char ID; // Pass identification, replacement for typeid + FuncletLayout() : MachineFunctionPass(ID) { + initializeFuncletLayoutPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; +}; +} + +char FuncletLayout::ID = 0; +char &llvm::FuncletLayoutID = FuncletLayout::ID; +INITIALIZE_PASS(FuncletLayout, "funclet-layout", + "Contiguously Lay Out Funclets", false, false) + +bool FuncletLayout::runOnMachineFunction(MachineFunction &F) { + DenseMap<const MachineBasicBlock *, int> FuncletMembership = + getFuncletMembership(F); + if (FuncletMembership.empty()) + return false; + + F.sort([&](MachineBasicBlock &X, MachineBasicBlock &Y) { + auto FuncletX = FuncletMembership.find(&X); + auto FuncletY = FuncletMembership.find(&Y); + assert(FuncletX != FuncletMembership.end()); + assert(FuncletY != FuncletMembership.end()); + return FuncletX->second < FuncletY->second; + }); + + // Conservatively assume we changed something. + return true; +} diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp new file mode 100644 index 0000000..c8116a4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp @@ -0,0 +1,177 @@ +//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the GCFunctionInfo class and GCModuleInfo pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +namespace { + +class Printer : public FunctionPass { + static char ID; + raw_ostream &OS; + +public: + explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} + + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; +}; +} + +INITIALIZE_PASS(GCModuleInfo, "collector-metadata", + "Create Garbage Collector Module Metadata", false, false) + +// ----------------------------------------------------------------------------- + +GCFunctionInfo::GCFunctionInfo(const Function &F, GCStrategy &S) + : F(F), S(S), FrameSize(~0LL) {} + +GCFunctionInfo::~GCFunctionInfo() {} + +// ----------------------------------------------------------------------------- + +char GCModuleInfo::ID = 0; + +GCModuleInfo::GCModuleInfo() : ImmutablePass(ID) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); +} + +GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) { + assert(!F.isDeclaration() && "Can only get GCFunctionInfo for a definition!"); + assert(F.hasGC()); + + finfo_map_type::iterator I = FInfoMap.find(&F); + if (I != FInfoMap.end()) + return *I->second; + + GCStrategy *S = getGCStrategy(F.getGC()); + Functions.push_back(make_unique<GCFunctionInfo>(F, *S)); + GCFunctionInfo *GFI = Functions.back().get(); + FInfoMap[&F] = GFI; + return *GFI; +} + +void GCModuleInfo::clear() { + Functions.clear(); + FInfoMap.clear(); + GCStrategyList.clear(); +} + +// ----------------------------------------------------------------------------- + +char Printer::ID = 0; + +FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) { + return new Printer(OS); +} + +const char *Printer::getPassName() const { + return "Print Garbage Collector Information"; +} + +void Printer::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<GCModuleInfo>(); +} + +static const char *DescKind(GC::PointKind Kind) { + switch (Kind) { + case GC::PreCall: + return "pre-call"; + case GC::PostCall: + return "post-call"; + } + llvm_unreachable("Invalid point kind"); +} + +bool Printer::runOnFunction(Function &F) { + if (F.hasGC()) + return false; + + GCFunctionInfo *FD = &getAnalysis<GCModuleInfo>().getFunctionInfo(F); + + OS << "GC roots for " << FD->getFunction().getName() << ":\n"; + for (GCFunctionInfo::roots_iterator RI = FD->roots_begin(), + RE = FD->roots_end(); + RI != RE; ++RI) + OS << "\t" << RI->Num << "\t" << RI->StackOffset << "[sp]\n"; + + OS << "GC safe points for " << FD->getFunction().getName() << ":\n"; + for (GCFunctionInfo::iterator PI = FD->begin(), PE = FD->end(); PI != PE; + ++PI) { + + OS << "\t" << PI->Label->getName() << ": " << DescKind(PI->Kind) + << ", live = {"; + + for (GCFunctionInfo::live_iterator RI = FD->live_begin(PI), + RE = FD->live_end(PI); + ;) { + OS << " " << RI->Num; + if (++RI == RE) + break; + OS << ","; + } + + OS << " }\n"; + } + + return false; +} + +bool Printer::doFinalization(Module &M) { + GCModuleInfo *GMI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(GMI && "Printer didn't require GCModuleInfo?!"); + GMI->clear(); + return false; +} + +GCStrategy *GCModuleInfo::getGCStrategy(const StringRef Name) { + // TODO: Arguably, just doing a linear search would be faster for small N + auto NMI = GCStrategyMap.find(Name); + if (NMI != GCStrategyMap.end()) + return NMI->getValue(); + + for (auto& Entry : GCRegistry::entries()) { + if (Name == Entry.getName()) { + std::unique_ptr<GCStrategy> S = Entry.instantiate(); + S->Name = Name; + GCStrategyMap[Name] = S.get(); + GCStrategyList.push_back(std::move(S)); + return GCStrategyList.back().get(); + } + } + + if (GCRegistry::begin() == GCRegistry::end()) { + // In normal operation, the registry should not be empty. There should + // be the builtin GCs if nothing else. The most likely scenario here is + // that we got here without running the initializers used by the Registry + // itself and it's registration mechanism. + const std::string error = ("unsupported GC: " + Name).str() + + " (did you remember to link and initialize the CodeGen library?)"; + report_fatal_error(error); + } else + report_fatal_error(std::string("unsupported GC: ") + Name); +} diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp new file mode 100644 index 0000000..bb8cfa1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp @@ -0,0 +1,19 @@ +//===-- GCMetadataPrinter.cpp - Garbage collection infrastructure ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the abstract base class GCMetadataPrinter. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadataPrinter.h" +using namespace llvm; + +GCMetadataPrinter::GCMetadataPrinter() {} + +GCMetadataPrinter::~GCMetadataPrinter() {} diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp new file mode 100644 index 0000000..484d317 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp @@ -0,0 +1,356 @@ +//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lowering for the gc.root mechanism. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { + +/// LowerIntrinsics - This pass rewrites calls to the llvm.gcread or +/// llvm.gcwrite intrinsics, replacing them with simple loads and stores as +/// directed by the GCStrategy. It also performs automatic root initialization +/// and custom intrinsic lowering. +class LowerIntrinsics : public FunctionPass { + bool PerformDefaultLowering(Function &F, GCStrategy &Coll); + +public: + static char ID; + + LowerIntrinsics(); + const char *getPassName() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; +}; + +/// GCMachineCodeAnalysis - This is a target-independent pass over the machine +/// function representation to identify safe points for the garbage collector +/// in the machine code. It inserts labels at safe points and populates a +/// GCMetadata record for each function. +class GCMachineCodeAnalysis : public MachineFunctionPass { + GCFunctionInfo *FI; + MachineModuleInfo *MMI; + const TargetInstrInfo *TII; + + void FindSafePoints(MachineFunction &MF); + void VisitCallPoint(MachineBasicBlock::iterator MI); + MCSymbol *InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + DebugLoc DL) const; + + void FindStackOffsets(MachineFunction &MF); + +public: + static char ID; + + GCMachineCodeAnalysis(); + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} + +// ----------------------------------------------------------------------------- + +INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering", false, + false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false) + +FunctionPass *llvm::createGCLoweringPass() { return new LowerIntrinsics(); } + +char LowerIntrinsics::ID = 0; + +LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) { + initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry()); +} + +const char *LowerIntrinsics::getPassName() const { + return "Lower Garbage Collection Instructions"; +} + +void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const { + FunctionPass::getAnalysisUsage(AU); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<DominatorTreeWrapperPass>(); +} + +static bool NeedsDefaultLoweringPass(const GCStrategy &C) { + // Default lowering is necessary only if read or write barriers have a default + // action. The default for roots is no action. + return !C.customWriteBarrier() || !C.customReadBarrier() || + C.initializeRoots(); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. +bool LowerIntrinsics::doInitialization(Module &M) { + GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>(); + assert(MI && "LowerIntrinsics didn't require GCModuleInfo!?"); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + if (!I->isDeclaration() && I->hasGC()) + MI->getFunctionInfo(*I); // Instantiate the GC strategy. + + return false; +} + +/// CouldBecomeSafePoint - Predicate to conservatively determine whether the +/// instruction could introduce a safe point. +static bool CouldBecomeSafePoint(Instruction *I) { + // The natural definition of instructions which could introduce safe points + // are: + // + // - call, invoke (AfterCall, BeforeCall) + // - phis (Loops) + // - invoke, ret, unwind (Exit) + // + // However, instructions as seemingly inoccuous as arithmetic can become + // libcalls upon lowering (e.g., div i64 on a 32-bit platform), so instead + // it is necessary to take a conservative approach. + + if (isa<AllocaInst>(I) || isa<GetElementPtrInst>(I) || isa<StoreInst>(I) || + isa<LoadInst>(I)) + return false; + + // llvm.gcroot is safe because it doesn't do anything at runtime. + if (CallInst *CI = dyn_cast<CallInst>(I)) + if (Function *F = CI->getCalledFunction()) + if (Intrinsic::ID IID = F->getIntrinsicID()) + if (IID == Intrinsic::gcroot) + return false; + + return true; +} + +static bool InsertRootInitializers(Function &F, AllocaInst **Roots, + unsigned Count) { + // Scroll past alloca instructions. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + while (isa<AllocaInst>(IP)) + ++IP; + + // Search for initializers in the initial BB. + SmallPtrSet<AllocaInst *, 16> InitedRoots; + for (; !CouldBecomeSafePoint(&*IP); ++IP) + if (StoreInst *SI = dyn_cast<StoreInst>(IP)) + if (AllocaInst *AI = + dyn_cast<AllocaInst>(SI->getOperand(1)->stripPointerCasts())) + InitedRoots.insert(AI); + + // Add root initializers. + bool MadeChange = false; + + for (AllocaInst **I = Roots, **E = Roots + Count; I != E; ++I) + if (!InitedRoots.count(*I)) { + StoreInst *SI = new StoreInst( + ConstantPointerNull::get(cast<PointerType>( + cast<PointerType>((*I)->getType())->getElementType())), + *I); + SI->insertAfter(*I); + MadeChange = true; + } + + return MadeChange; +} + +/// runOnFunction - Replace gcread/gcwrite intrinsics with loads and stores. +/// Leave gcroot intrinsics; the code generator needs to see those. +bool LowerIntrinsics::runOnFunction(Function &F) { + // Quick exit for functions that do not use GC. + if (!F.hasGC()) + return false; + + GCFunctionInfo &FI = getAnalysis<GCModuleInfo>().getFunctionInfo(F); + GCStrategy &S = FI.getStrategy(); + + bool MadeChange = false; + + if (NeedsDefaultLoweringPass(S)) + MadeChange |= PerformDefaultLowering(F, S); + + return MadeChange; +} + +bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { + bool LowerWr = !S.customWriteBarrier(); + bool LowerRd = !S.customReadBarrier(); + bool InitRoots = S.initializeRoots(); + + SmallVector<AllocaInst *, 32> Roots; + + bool MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) { + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) { + Function *F = CI->getCalledFunction(); + switch (F->getIntrinsicID()) { + case Intrinsic::gcwrite: + if (LowerWr) { + // Replace a write barrier with a simple store. + Value *St = + new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); + CI->replaceAllUsesWith(St); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcread: + if (LowerRd) { + // Replace a read barrier with a simple load. + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); + Ld->takeName(CI); + CI->replaceAllUsesWith(Ld); + CI->eraseFromParent(); + } + break; + case Intrinsic::gcroot: + if (InitRoots) { + // Initialize the GC root, but do not delete the intrinsic. The + // backend needs the intrinsic to flag the stack slot. + Roots.push_back( + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + } + break; + default: + continue; + } + + MadeChange = true; + } + } + } + + if (Roots.size()) + MadeChange |= InsertRootInitializers(F, Roots.begin(), Roots.size()); + + return MadeChange; +} + +// ----------------------------------------------------------------------------- + +char GCMachineCodeAnalysis::ID = 0; +char &llvm::GCMachineCodeAnalysisID = GCMachineCodeAnalysis::ID; + +INITIALIZE_PASS(GCMachineCodeAnalysis, "gc-analysis", + "Analyze Machine Code For Garbage Collection", false, false) + +GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {} + +void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); + AU.addRequired<GCModuleInfo>(); +} + +MCSymbol *GCMachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + MCSymbol *Label = MBB.getParent()->getContext().createTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +void GCMachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) { + // Find the return address (next instruction), too, so as to bracket the call + // instruction. + MachineBasicBlock::iterator RAI = CI; + ++RAI; + + if (FI->getStrategy().needsSafePoint(GC::PreCall)) { + MCSymbol *Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc()); + FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc()); + } + + if (FI->getStrategy().needsSafePoint(GC::PostCall)) { + MCSymbol *Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc()); + FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc()); + } +} + +void GCMachineCodeAnalysis::FindSafePoints(MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + if (MI->isCall()) { + // Do not treat tail or sibling call sites as safe points. This is + // legal since any arguments passed to the callee which live in the + // remnants of the callers frame will be owned and updated by the + // callee if required. + if (MI->isTerminator()) + continue; + VisitCallPoint(MI); + } +} + +void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + assert(TFI && "TargetRegisterInfo not available!"); + + for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(); + RI != FI->roots_end();) { + // If the root references a dead object, no need to keep it. + if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) { + RI = FI->removeStackRoot(RI); + } else { + unsigned FrameReg; // FIXME: surely GCRoot ought to store the + // register that the offset is from? + RI->StackOffset = TFI->getFrameIndexReference(MF, RI->Num, FrameReg); + ++RI; + } + } +} + +bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { + // Quick exit for functions that do not use GC. + if (!MF.getFunction()->hasGC()) + return false; + + FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(*MF.getFunction()); + MMI = &getAnalysis<MachineModuleInfo>(); + TII = MF.getSubtarget().getInstrInfo(); + + // Find the size of the stack frame. There may be no correct static frame + // size, we use UINT64_MAX to represent this. + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + const bool DynamicFrameSize = MFI->hasVarSizedObjects() || + RegInfo->needsStackRealignment(MF); + FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize()); + + // Find all safe points. + if (FI->getStrategy().needsSafePoints()) + FindSafePoints(MF); + + // Find the concrete stack offsets for all roots (stack slots) + FindStackOffsets(MF); + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp new file mode 100644 index 0000000..554d326 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp @@ -0,0 +1,22 @@ +//===-- GCStrategy.cpp - Garbage Collector Description --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the policy object GCStrategy which describes the +// behavior of a given garbage collector. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" + +using namespace llvm; + +GCStrategy::GCStrategy() + : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false), + CustomWriteBarriers(false), CustomRoots(false), InitRoots(true), + UsesMetadata(false) {} diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp new file mode 100644 index 0000000..dd9a840 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp @@ -0,0 +1,593 @@ +//===-- GlobalMerge.cpp - Internal globals merging -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// This pass merges globals with internal linkage into one. This way all the +// globals which were merged into a biggest one can be addressed using offsets +// from the same base pointer (no need for separate base pointer for each of the +// global). Such a transformation can significantly reduce the register pressure +// when many globals are involved. +// +// For example, consider the code which touches several global variables at +// once: +// +// static int foo[N], bar[N], baz[N]; +// +// for (i = 0; i < N; ++i) { +// foo[i] = bar[i] * baz[i]; +// } +// +// On ARM the addresses of 3 arrays should be kept in the registers, thus +// this code has quite large register pressure (loop body): +// +// ldr r1, [r5], #4 +// ldr r2, [r6], #4 +// mul r1, r2, r1 +// str r1, [r0], #4 +// +// Pass converts the code to something like: +// +// static struct { +// int foo[N]; +// int bar[N]; +// int baz[N]; +// } merged; +// +// for (i = 0; i < N; ++i) { +// merged.foo[i] = merged.bar[i] * merged.baz[i]; +// } +// +// and in ARM code this becomes: +// +// ldr r0, [r5, #40] +// ldr r1, [r5, #80] +// mul r0, r1, r0 +// str r0, [r5], #4 +// +// note that we saved 2 registers here almostly "for free". +// +// However, merging globals can have tradeoffs: +// - it confuses debuggers, tools, and users +// - it makes linker optimizations less useful (order files, LOHs, ...) +// - it forces usage of indexed addressing (which isn't necessarily "free") +// - it can increase register pressure when the uses are disparate enough. +// +// We use heuristics to discover the best global grouping we can (cf cl::opts). +// ===---------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "global-merge" + +// FIXME: This is only useful as a last-resort way to disable the pass. +static cl::opt<bool> +EnableGlobalMerge("enable-global-merge", cl::Hidden, + cl::desc("Enable the global merge pass"), + cl::init(true)); + +static cl::opt<bool> GlobalMergeGroupByUse( + "global-merge-group-by-use", cl::Hidden, + cl::desc("Improve global merge pass to look at uses"), cl::init(true)); + +static cl::opt<bool> GlobalMergeIgnoreSingleUse( + "global-merge-ignore-single-use", cl::Hidden, + cl::desc("Improve global merge pass to ignore globals only used alone"), + cl::init(true)); + +static cl::opt<bool> +EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, + cl::desc("Enable global merge pass on constants"), + cl::init(false)); + +// FIXME: this could be a transitional option, and we probably need to remove +// it if only we are sure this optimization could always benefit all targets. +static cl::opt<cl::boolOrDefault> +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage")); + +STATISTIC(NumMerged, "Number of globals merged"); +namespace { + class GlobalMerge : public FunctionPass { + const TargetMachine *TM; + // FIXME: Infer the maximum possible offset depending on the actual users + // (these max offsets are different for the users inside Thumb or ARM + // functions), see the code that passes in the offset in the ARM backend + // for more information. + unsigned MaxOffset; + + /// Whether we should try to optimize for size only. + /// Currently, this applies a dead simple heuristic: only consider globals + /// used in minsize functions for merging. + /// FIXME: This could learn about optsize, and be used in the cost model. + bool OnlyOptimizeForSize; + + /// Whether we should merge global variables that have external linkage. + bool MergeExternalGlobals; + + bool doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst, unsigned AddrSpace) const; + /// \brief Merge everything in \p Globals for which the corresponding bit + /// in \p GlobalSet is set. + bool doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, + const BitVector &GlobalSet, Module &M, bool isConst, + unsigned AddrSpace) const; + + /// \brief Check if the given variable has been identified as must keep + /// \pre setMustKeepGlobalVariables must have been called on the Module that + /// contains GV + bool isMustKeepGlobalVariable(const GlobalVariable *GV) const { + return MustKeepGlobalVariables.count(GV); + } + + /// Collect every variables marked as "used" or used in a landing pad + /// instruction for this Module. + void setMustKeepGlobalVariables(Module &M); + + /// Collect every variables marked as "used" + void collectUsedGlobalVariables(Module &M); + + /// Keep track of the GlobalVariable that must not be merged away + SmallPtrSet<const GlobalVariable *, 16> MustKeepGlobalVariables; + + public: + static char ID; // Pass identification, replacement for typeid. + explicit GlobalMerge(const TargetMachine *TM = nullptr, + unsigned MaximalOffset = 0, + bool OnlyOptimizeForSize = false, + bool MergeExternalGlobals = false) + : FunctionPass(ID), TM(TM), MaxOffset(MaximalOffset), + OnlyOptimizeForSize(OnlyOptimizeForSize), + MergeExternalGlobals(MergeExternalGlobals) { + initializeGlobalMergePass(*PassRegistry::getPassRegistry()); + } + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + bool doFinalization(Module &M) override; + + const char *getPassName() const override { + return "Merge internal globals"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + FunctionPass::getAnalysisUsage(AU); + } + }; +} // end anonymous namespace + +char GlobalMerge::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalMerge, "global-merge", "Merge global variables", + false, false) +INITIALIZE_PASS_END(GlobalMerge, "global-merge", "Merge global variables", + false, false) + +bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, + Module &M, bool isConst, unsigned AddrSpace) const { + auto &DL = M.getDataLayout(); + // FIXME: Find better heuristics + std::stable_sort(Globals.begin(), Globals.end(), + [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) { + return DL.getTypeAllocSize(GV1->getValueType()) < + DL.getTypeAllocSize(GV2->getValueType()); + }); + + // If we want to just blindly group all globals together, do so. + if (!GlobalMergeGroupByUse) { + BitVector AllGlobals(Globals.size()); + AllGlobals.set(); + return doMerge(Globals, AllGlobals, M, isConst, AddrSpace); + } + + // If we want to be smarter, look at all uses of each global, to try to + // discover all sets of globals used together, and how many times each of + // these sets occurred. + // + // Keep this reasonably efficient, by having an append-only list of all sets + // discovered so far (UsedGlobalSet), and mapping each "together-ness" unit of + // code (currently, a Function) to the set of globals seen so far that are + // used together in that unit (GlobalUsesByFunction). + // + // When we look at the Nth global, we now that any new set is either: + // - the singleton set {N}, containing this global only, or + // - the union of {N} and a previously-discovered set, containing some + // combination of the previous N-1 globals. + // Using that knowledge, when looking at the Nth global, we can keep: + // - a reference to the singleton set {N} (CurGVOnlySetIdx) + // - a list mapping each previous set to its union with {N} (EncounteredUGS), + // if it actually occurs. + + // We keep track of the sets of globals used together "close enough". + struct UsedGlobalSet { + UsedGlobalSet(size_t Size) : Globals(Size), UsageCount(1) {} + BitVector Globals; + unsigned UsageCount; + }; + + // Each set is unique in UsedGlobalSets. + std::vector<UsedGlobalSet> UsedGlobalSets; + + // Avoid repeating the create-global-set pattern. + auto CreateGlobalSet = [&]() -> UsedGlobalSet & { + UsedGlobalSets.emplace_back(Globals.size()); + return UsedGlobalSets.back(); + }; + + // The first set is the empty set. + CreateGlobalSet().UsageCount = 0; + + // We define "close enough" to be "in the same function". + // FIXME: Grouping uses by function is way too aggressive, so we should have + // a better metric for distance between uses. + // The obvious alternative would be to group by BasicBlock, but that's in + // turn too conservative.. + // Anything in between wouldn't be trivial to compute, so just stick with + // per-function grouping. + + // The value type is an index into UsedGlobalSets. + // The default (0) conveniently points to the empty set. + DenseMap<Function *, size_t /*UsedGlobalSetIdx*/> GlobalUsesByFunction; + + // Now, look at each merge-eligible global in turn. + + // Keep track of the sets we already encountered to which we added the + // current global. + // Each element matches the same-index element in UsedGlobalSets. + // This lets us efficiently tell whether a set has already been expanded to + // include the current global. + std::vector<size_t> EncounteredUGS; + + for (size_t GI = 0, GE = Globals.size(); GI != GE; ++GI) { + GlobalVariable *GV = Globals[GI]; + + // Reset the encountered sets for this global... + std::fill(EncounteredUGS.begin(), EncounteredUGS.end(), 0); + // ...and grow it in case we created new sets for the previous global. + EncounteredUGS.resize(UsedGlobalSets.size()); + + // We might need to create a set that only consists of the current global. + // Keep track of its index into UsedGlobalSets. + size_t CurGVOnlySetIdx = 0; + + // For each global, look at all its Uses. + for (auto &U : GV->uses()) { + // This Use might be a ConstantExpr. We're interested in Instruction + // users, so look through ConstantExpr... + Use *UI, *UE; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U.getUser())) { + if (CE->use_empty()) + continue; + UI = &*CE->use_begin(); + UE = nullptr; + } else if (isa<Instruction>(U.getUser())) { + UI = &U; + UE = UI->getNext(); + } else { + continue; + } + + // ...to iterate on all the instruction users of the global. + // Note that we iterate on Uses and not on Users to be able to getNext(). + for (; UI != UE; UI = UI->getNext()) { + Instruction *I = dyn_cast<Instruction>(UI->getUser()); + if (!I) + continue; + + Function *ParentFn = I->getParent()->getParent(); + + // If we're only optimizing for size, ignore non-minsize functions. + if (OnlyOptimizeForSize && !ParentFn->optForMinSize()) + continue; + + size_t UGSIdx = GlobalUsesByFunction[ParentFn]; + + // If this is the first global the basic block uses, map it to the set + // consisting of this global only. + if (!UGSIdx) { + // If that set doesn't exist yet, create it. + if (!CurGVOnlySetIdx) { + CurGVOnlySetIdx = UsedGlobalSets.size(); + CreateGlobalSet().Globals.set(GI); + } else { + ++UsedGlobalSets[CurGVOnlySetIdx].UsageCount; + } + + GlobalUsesByFunction[ParentFn] = CurGVOnlySetIdx; + continue; + } + + // If we already encountered this BB, just increment the counter. + if (UsedGlobalSets[UGSIdx].Globals.test(GI)) { + ++UsedGlobalSets[UGSIdx].UsageCount; + continue; + } + + // If not, the previous set wasn't actually used in this function. + --UsedGlobalSets[UGSIdx].UsageCount; + + // If we already expanded the previous set to include this global, just + // reuse that expanded set. + if (size_t ExpandedIdx = EncounteredUGS[UGSIdx]) { + ++UsedGlobalSets[ExpandedIdx].UsageCount; + GlobalUsesByFunction[ParentFn] = ExpandedIdx; + continue; + } + + // If not, create a new set consisting of the union of the previous set + // and this global. Mark it as encountered, so we can reuse it later. + GlobalUsesByFunction[ParentFn] = EncounteredUGS[UGSIdx] = + UsedGlobalSets.size(); + + UsedGlobalSet &NewUGS = CreateGlobalSet(); + NewUGS.Globals.set(GI); + NewUGS.Globals |= UsedGlobalSets[UGSIdx].Globals; + } + } + } + + // Now we found a bunch of sets of globals used together. We accumulated + // the number of times we encountered the sets (i.e., the number of blocks + // that use that exact set of globals). + // + // Multiply that by the size of the set to give us a crude profitability + // metric. + std::sort(UsedGlobalSets.begin(), UsedGlobalSets.end(), + [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) { + return UGS1.Globals.count() * UGS1.UsageCount < + UGS2.Globals.count() * UGS2.UsageCount; + }); + + // We can choose to merge all globals together, but ignore globals never used + // with another global. This catches the obviously non-profitable cases of + // having a single global, but is aggressive enough for any other case. + if (GlobalMergeIgnoreSingleUse) { + BitVector AllGlobals(Globals.size()); + for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { + const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + if (UGS.UsageCount == 0) + continue; + if (UGS.Globals.count() > 1) + AllGlobals |= UGS.Globals; + } + return doMerge(Globals, AllGlobals, M, isConst, AddrSpace); + } + + // Starting from the sets with the best (=biggest) profitability, find a + // good combination. + // The ideal (and expensive) solution can only be found by trying all + // combinations, looking for the one with the best profitability. + // Don't be smart about it, and just pick the first compatible combination, + // starting with the sets with the best profitability. + BitVector PickedGlobals(Globals.size()); + bool Changed = false; + + for (size_t i = 0, e = UsedGlobalSets.size(); i != e; ++i) { + const UsedGlobalSet &UGS = UsedGlobalSets[e - i - 1]; + if (UGS.UsageCount == 0) + continue; + if (PickedGlobals.anyCommon(UGS.Globals)) + continue; + PickedGlobals |= UGS.Globals; + // If the set only contains one global, there's no point in merging. + // Ignore the global for inclusion in other sets though, so keep it in + // PickedGlobals. + if (UGS.Globals.count() < 2) + continue; + Changed |= doMerge(Globals, UGS.Globals, M, isConst, AddrSpace); + } + + return Changed; +} + +bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals, + const BitVector &GlobalSet, Module &M, bool isConst, + unsigned AddrSpace) const { + assert(Globals.size() > 1); + + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + auto &DL = M.getDataLayout(); + + DEBUG(dbgs() << " Trying to merge set, starts with #" + << GlobalSet.find_first() << "\n"); + + ssize_t i = GlobalSet.find_first(); + while (i != -1) { + ssize_t j = 0; + uint64_t MergedSize = 0; + std::vector<Type*> Tys; + std::vector<Constant*> Inits; + + for (j = i; j != -1; j = GlobalSet.find_next(j)) { + Type *Ty = Globals[j]->getValueType(); + MergedSize += DL.getTypeAllocSize(Ty); + if (MergedSize > MaxOffset) { + break; + } + Tys.push_back(Ty); + Inits.push_back(Globals[j]->getInitializer()); + } + + StructType *MergedTy = StructType::get(M.getContext(), Tys); + Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); + + GlobalVariable *MergedGV = new GlobalVariable( + M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit, + "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + + for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + + Constant *Idx[2] = { + ConstantInt::get(Int32Ty, 0), + ConstantInt::get(Int32Ty, idx), + }; + Constant *GEP = + ConstantExpr::getInBoundsGetElementPtr(MergedTy, MergedGV, Idx); + Globals[k]->replaceAllUsesWith(GEP); + Globals[k]->eraseFromParent(); + + // When the linkage is not internal we must emit an alias for the original + // variable name as it may be accessed from another object. On non-Mach-O + // we can also emit an alias for internal linkage as it's safe to do so. + // It's not safe on Mach-O as the alias (and thus the portion of the + // MergedGlobals variable) may be dead stripped at link time. + if (Linkage != GlobalValue::InternalLinkage || + !TM->getTargetTriple().isOSBinFormatMachO()) { + GlobalAlias::create(Tys[idx], AddrSpace, Linkage, Name, GEP, &M); + } + + NumMerged++; + } + i = j; + } + + return true; +} + +void GlobalMerge::collectUsedGlobalVariables(Module &M) { + // Extract global variables from llvm.used array + const GlobalVariable *GV = M.getGlobalVariable("llvm.used"); + if (!GV || !GV->hasInitializer()) return; + + // Should be an array of 'i8*'. + const ConstantArray *InitList = cast<ConstantArray>(GV->getInitializer()); + + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) + if (const GlobalVariable *G = + dyn_cast<GlobalVariable>(InitList->getOperand(i)->stripPointerCasts())) + MustKeepGlobalVariables.insert(G); +} + +void GlobalMerge::setMustKeepGlobalVariables(Module &M) { + collectUsedGlobalVariables(M); + + for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn; + ++IFn) { + for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end(); + IBB != IEndBB; ++IBB) { + // Follow the invoke link to find the landing pad instruction + const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator()); + if (!II) continue; + + const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst(); + // Look for globals in the clauses of the landing pad instruction + for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses(); + Idx != NumClauses; ++Idx) + if (const GlobalVariable *GV = + dyn_cast<GlobalVariable>(LPInst->getClause(Idx) + ->stripPointerCasts())) + MustKeepGlobalVariables.insert(GV); + } + } +} + +bool GlobalMerge::doInitialization(Module &M) { + if (!EnableGlobalMerge) + return false; + + auto &DL = M.getDataLayout(); + DenseMap<unsigned, SmallVector<GlobalVariable*, 16> > Globals, ConstGlobals, + BSSGlobals; + bool Changed = false; + setMustKeepGlobalVariables(M); + + // Grab all non-const globals. + for (auto &GV : M.globals()) { + // Merge is safe for "normal" internal or external globals only + if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection()) + continue; + + if (!(MergeExternalGlobals && GV.hasExternalLinkage()) && + !GV.hasInternalLinkage()) + continue; + + PointerType *PT = dyn_cast<PointerType>(GV.getType()); + assert(PT && "Global variable is not a pointer!"); + + unsigned AddressSpace = PT->getAddressSpace(); + + // Ignore fancy-aligned globals for now. + unsigned Alignment = DL.getPreferredAlignment(&GV); + Type *Ty = GV.getValueType(); + if (Alignment > DL.getABITypeAlignment(Ty)) + continue; + + // Ignore all 'special' globals. + if (GV.getName().startswith("llvm.") || + GV.getName().startswith(".llvm.")) + continue; + + // Ignore all "required" globals: + if (isMustKeepGlobalVariable(&GV)) + continue; + + if (DL.getTypeAllocSize(Ty) < MaxOffset) { + if (TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSSLocal()) + BSSGlobals[AddressSpace].push_back(&GV); + else if (GV.isConstant()) + ConstGlobals[AddressSpace].push_back(&GV); + else + Globals[AddressSpace].push_back(&GV); + } + } + + for (auto &P : Globals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, false, P.first); + + for (auto &P : BSSGlobals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, false, P.first); + + if (EnableGlobalMergeOnConst) + for (auto &P : ConstGlobals) + if (P.second.size() > 1) + Changed |= doMerge(P.second, M, true, P.first); + + return Changed; +} + +bool GlobalMerge::runOnFunction(Function &F) { + return false; +} + +bool GlobalMerge::doFinalization(Module &M) { + MustKeepGlobalVariables.clear(); + return false; +} + +Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset, + bool OnlyOptimizeForSize, + bool MergeExternalByDefault) { + bool MergeExternal = (EnableGlobalMergeOnExternal == cl::BOU_UNSET) ? + MergeExternalByDefault : (EnableGlobalMergeOnExternal == cl::BOU_TRUE); + return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal); +} diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp new file mode 100644 index 0000000..c38c9d2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -0,0 +1,1772 @@ +//===-- IfConversion.cpp - Machine code if conversion pass. ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine instruction level if-conversion pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "BranchFolding.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> + +using namespace llvm; + +#define DEBUG_TYPE "ifcvt" + +// Hidden options for help debugging. +static cl::opt<int> IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); +static cl::opt<int> IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); +static cl::opt<bool> DisableSimple("disable-ifcvt-simple", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableSimpleF("disable-ifcvt-simple-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangle("disable-ifcvt-triangle", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleR("disable-ifcvt-triangle-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleF("disable-ifcvt-triangle-false", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev", + cl::init(false), cl::Hidden); +static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond", + cl::init(false), cl::Hidden); +static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); + +STATISTIC(NumSimple, "Number of simple if-conversions performed"); +STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); +STATISTIC(NumTriangle, "Number of triangle if-conversions performed"); +STATISTIC(NumTriangleRev, "Number of triangle (R) if-conversions performed"); +STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed"); +STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed"); +STATISTIC(NumDiamonds, "Number of diamond if-conversions performed"); +STATISTIC(NumIfConvBBs, "Number of if-converted blocks"); +STATISTIC(NumDupBBs, "Number of duplicated blocks"); +STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated"); + +namespace { + class IfConverter : public MachineFunctionPass { + enum IfcvtKind { + ICNotClassfied, // BB data valid, but not classified. + ICSimpleFalse, // Same as ICSimple, but on the false path. + ICSimple, // BB is entry of an one split, no rejoin sub-CFG. + ICTriangleFRev, // Same as ICTriangleFalse, but false path rev condition. + ICTriangleRev, // Same as ICTriangle, but true path rev condition. + ICTriangleFalse, // Same as ICTriangle, but on the false path. + ICTriangle, // BB is entry of a triangle sub-CFG. + ICDiamond // BB is entry of a diamond sub-CFG. + }; + + /// BBInfo - One per MachineBasicBlock, this is used to cache the result + /// if-conversion feasibility analysis. This includes results from + /// TargetInstrInfo::AnalyzeBranch() (i.e. TBB, FBB, and Cond), and its + /// classification, and common tail block of its successors (if it's a + /// diamond shape), its size, whether it's predicable, and whether any + /// instruction can clobber the 'would-be' predicate. + /// + /// IsDone - True if BB is not to be considered for ifcvt. + /// IsBeingAnalyzed - True if BB is currently being analyzed. + /// IsAnalyzed - True if BB has been analyzed (info is still valid). + /// IsEnqueued - True if BB has been enqueued to be ifcvt'ed. + /// IsBrAnalyzable - True if AnalyzeBranch() returns false. + /// HasFallThrough - True if BB may fallthrough to the following BB. + /// IsUnpredicable - True if BB is known to be unpredicable. + /// ClobbersPred - True if BB could modify predicates (e.g. has + /// cmp, call, etc.) + /// NonPredSize - Number of non-predicated instructions. + /// ExtraCost - Extra cost for multi-cycle instructions. + /// ExtraCost2 - Some instructions are slower when predicated + /// BB - Corresponding MachineBasicBlock. + /// TrueBB / FalseBB- See AnalyzeBranch(). + /// BrCond - Conditions for end of block conditional branches. + /// Predicate - Predicate used in the BB. + struct BBInfo { + bool IsDone : 1; + bool IsBeingAnalyzed : 1; + bool IsAnalyzed : 1; + bool IsEnqueued : 1; + bool IsBrAnalyzable : 1; + bool HasFallThrough : 1; + bool IsUnpredicable : 1; + bool CannotBeCopied : 1; + bool ClobbersPred : 1; + unsigned NonPredSize; + unsigned ExtraCost; + unsigned ExtraCost2; + MachineBasicBlock *BB; + MachineBasicBlock *TrueBB; + MachineBasicBlock *FalseBB; + SmallVector<MachineOperand, 4> BrCond; + SmallVector<MachineOperand, 4> Predicate; + BBInfo() : IsDone(false), IsBeingAnalyzed(false), + IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false), + HasFallThrough(false), IsUnpredicable(false), + CannotBeCopied(false), ClobbersPred(false), NonPredSize(0), + ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr), + FalseBB(nullptr) {} + }; + + /// IfcvtToken - Record information about pending if-conversions to attempt: + /// BBI - Corresponding BBInfo. + /// Kind - Type of block. See IfcvtKind. + /// NeedSubsumption - True if the to-be-predicated BB has already been + /// predicated. + /// NumDups - Number of instructions that would be duplicated due + /// to this if-conversion. (For diamonds, the number of + /// identical instructions at the beginnings of both + /// paths). + /// NumDups2 - For diamonds, the number of identical instructions + /// at the ends of both paths. + struct IfcvtToken { + BBInfo &BBI; + IfcvtKind Kind; + bool NeedSubsumption; + unsigned NumDups; + unsigned NumDups2; + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0) + : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} + }; + + /// BBAnalysis - Results of if-conversion feasibility analysis indexed by + /// basic block number. + std::vector<BBInfo> BBAnalysis; + TargetSchedModel SchedModel; + + const TargetLoweringBase *TLI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineBlockFrequencyInfo *MBFI; + const MachineBranchProbabilityInfo *MBPI; + MachineRegisterInfo *MRI; + + LivePhysRegs Redefs; + LivePhysRegs DontKill; + + bool PreRegAlloc; + bool MadeChange; + int FnNum; + std::function<bool(const Function &)> PredicateFtor; + + public: + static char ID; + IfConverter(std::function<bool(const Function &)> Ftor = nullptr) + : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(Ftor) { + initializeIfConverterPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + bool ReverseBranchCondition(BBInfo &BBI); + bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + BranchProbability Prediction) const; + bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups, + BranchProbability Prediction) const; + bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const; + void ScanInstructions(BBInfo &BBI); + void AnalyzeBlock(MachineBasicBlock *MBB, std::vector<IfcvtToken*> &Tokens); + bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond, + bool isTriangle = false, bool RevBranch = false); + void AnalyzeBlocks(MachineFunction &MF, std::vector<IfcvtToken*> &Tokens); + void InvalidatePreds(MachineBasicBlock *BB); + void RemoveExtraEdges(BBInfo &BBI); + bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind); + bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2); + void PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> *LaterRedefs = nullptr); + void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + bool IgnoreBr = false); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); + + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, + unsigned Cycle, unsigned Extra, + BranchProbability Prediction) const { + return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra, + Prediction); + } + + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, + unsigned TCycle, unsigned TExtra, + MachineBasicBlock &FBB, + unsigned FCycle, unsigned FExtra, + BranchProbability Prediction) const { + return TCycle > 0 && FCycle > 0 && + TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra, + Prediction); + } + + // blockAlwaysFallThrough - Block ends without a terminator. + bool blockAlwaysFallThrough(BBInfo &BBI) const { + return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr; + } + + // IfcvtTokenCmp - Used to sort if-conversion candidates. + static bool IfcvtTokenCmp(IfcvtToken *C1, IfcvtToken *C2) { + int Incr1 = (C1->Kind == ICDiamond) + ? -(int)(C1->NumDups + C1->NumDups2) : (int)C1->NumDups; + int Incr2 = (C2->Kind == ICDiamond) + ? -(int)(C2->NumDups + C2->NumDups2) : (int)C2->NumDups; + if (Incr1 > Incr2) + return true; + else if (Incr1 == Incr2) { + // Favors subsumption. + if (!C1->NeedSubsumption && C2->NeedSubsumption) + return true; + else if (C1->NeedSubsumption == C2->NeedSubsumption) { + // Favors diamond over triangle, etc. + if ((unsigned)C1->Kind < (unsigned)C2->Kind) + return true; + else if (C1->Kind == C2->Kind) + return C1->BBI.BB->getNumber() < C2->BBI.BB->getNumber(); + } + } + return false; + } + }; + + char IfConverter::ID = 0; +} + +char &llvm::IfConverterID = IfConverter::ID; + +INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false) + +bool IfConverter::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + return false; + + const TargetSubtargetInfo &ST = MF.getSubtarget(); + TLI = ST.getTargetLowering(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MRI = &MF.getRegInfo(); + SchedModel.init(ST.getSchedModel(), &ST, TII); + + if (!TII) return false; + + PreRegAlloc = MRI->isSSA(); + + bool BFChange = false; + if (!PreRegAlloc) { + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true, false, *MBFI, *MBPI); + BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + } + + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" + << MF.getName() << "\'"); + + if (FnNum < IfCvtFnStart || (IfCvtFnStop != -1 && FnNum > IfCvtFnStop)) { + DEBUG(dbgs() << " skipped\n"); + return false; + } + DEBUG(dbgs() << "\n"); + + MF.RenumberBlocks(); + BBAnalysis.resize(MF.getNumBlockIDs()); + + std::vector<IfcvtToken*> Tokens; + MadeChange = false; + unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + + NumTriangleRev + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { + // Do an initial analysis for each basic block and find all the potential + // candidates to perform if-conversion. + bool Change = false; + AnalyzeBlocks(MF, Tokens); + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + BBInfo &BBI = Token->BBI; + IfcvtKind Kind = Token->Kind; + unsigned NumDups = Token->NumDups; + unsigned NumDups2 = Token->NumDups2; + + delete Token; + + // If the block has been evicted out of the queue or it has already been + // marked dead (due to it being predicated), then skip it. + if (BBI.IsDone) + BBI.IsEnqueued = false; + if (!BBI.IsEnqueued) + continue; + + BBI.IsEnqueued = false; + + bool RetVal = false; + switch (Kind) { + default: llvm_unreachable("Unexpected!"); + case ICSimple: + case ICSimpleFalse: { + bool isFalse = Kind == ICSimpleFalse; + if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? + " false" : "") + << "): BB#" << BBI.BB->getNumber() << " (" + << ((Kind == ICSimpleFalse) + ? BBI.FalseBB->getNumber() + : BBI.TrueBB->getNumber()) << ") "); + RetVal = IfConvertSimple(BBI, Kind); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) { + if (isFalse) ++NumSimpleFalse; + else ++NumSimple; + } + break; + } + case ICTriangle: + case ICTriangleRev: + case ICTriangleFalse: + case ICTriangleFRev: { + bool isFalse = Kind == ICTriangleFalse; + bool isRev = (Kind == ICTriangleRev || Kind == ICTriangleFRev); + if (DisableTriangle && !isFalse && !isRev) break; + if (DisableTriangleR && !isFalse && isRev) break; + if (DisableTriangleF && isFalse && !isRev) break; + if (DisableTriangleFR && isFalse && isRev) break; + DEBUG(dbgs() << "Ifcvt (Triangle"); + if (isFalse) + DEBUG(dbgs() << " false"); + if (isRev) + DEBUG(dbgs() << " rev"); + DEBUG(dbgs() << "): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertTriangle(BBI, Kind); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) { + if (isFalse) { + if (isRev) ++NumTriangleFRev; + else ++NumTriangleFalse; + } else { + if (isRev) ++NumTriangleRev; + else ++NumTriangle; + } + } + break; + } + case ICDiamond: { + if (DisableDiamond) break; + DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:" + << BBI.TrueBB->getNumber() << ",F:" + << BBI.FalseBB->getNumber() << ") "); + RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); + if (RetVal) ++NumDiamonds; + break; + } + } + + Change |= RetVal; + + NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + NumTriangleRev + + NumTriangleFalse + NumTriangleFRev + NumDiamonds; + if (IfCvtLimit != -1 && (int)NumIfCvts >= IfCvtLimit) + break; + } + + if (!Change) + break; + MadeChange |= Change; + } + + // Delete tokens in case of early exit. + while (!Tokens.empty()) { + IfcvtToken *Token = Tokens.back(); + Tokens.pop_back(); + delete Token; + } + + Tokens.clear(); + BBAnalysis.clear(); + + if (MadeChange && IfCvtBranchFold) { + BranchFolder BF(false, false, *MBFI, *MBPI); + BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(), + getAnalysisIfAvailable<MachineModuleInfo>()); + } + + MadeChange |= BFChange; + return MadeChange; +} + +/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given +/// its 'true' successor. +static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, + MachineBasicBlock *TrueBB) { + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + E = BB->succ_end(); SI != E; ++SI) { + MachineBasicBlock *SuccBB = *SI; + if (SuccBB != TrueBB) + return SuccBB; + } + return nullptr; +} + +/// ReverseBranchCondition - Reverse the condition of the end of the block +/// branch. Swap block's 'true' and 'false' successors. +bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + DebugLoc dl; // FIXME: this is nowhere + if (!TII->ReverseBranchCondition(BBI.BrCond)) { + TII->RemoveBranch(*BBI.BB); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); + std::swap(BBI.TrueBB, BBI.FalseBB); + return true; + } + return false; +} + +/// getNextBlock - Returns the next block in the function blocks ordering. If +/// it is the end, returns NULL. +static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) { + MachineFunction::iterator I = BB->getIterator(); + MachineFunction::iterator E = BB->getParent()->end(); + if (++I == E) + return nullptr; + return &*I; +} + +/// ValidSimple - Returns true if the 'true' block (along with its +/// predecessor) forms a valid simple shape for ifcvt. It also returns the +/// number of instructions that the ifcvt would need to duplicate if performed +/// in Dups. +bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups, + BranchProbability Prediction) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.IsBrAnalyzable) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied || + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize, + Prediction)) + return false; + Dups = TrueBBI.NonPredSize; + } + + return true; +} + +/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid triangle shape for ifcvt. +/// If 'FalseBranch' is true, it checks if 'true' block's false branch +/// branches to the 'false' block rather than the other way around. It also +/// returns the number of instructions that the ifcvt would need to duplicate +/// if performed in 'Dups'. +bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, + bool FalseBranch, unsigned &Dups, + BranchProbability Prediction) const { + Dups = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone) + return false; + + if (TrueBBI.BB->pred_size() > 1) { + if (TrueBBI.CannotBeCopied) + return false; + + unsigned Size = TrueBBI.NonPredSize; + if (TrueBBI.IsBrAnalyzable) { + if (TrueBBI.TrueBB && TrueBBI.BrCond.empty()) + // Ends with an unconditional branch. It will be removed. + --Size; + else { + MachineBasicBlock *FExit = FalseBranch + ? TrueBBI.TrueBB : TrueBBI.FalseBB; + if (FExit) + // Require a conditional branch + ++Size; + } + } + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction)) + return false; + Dups = Size; + } + + MachineBasicBlock *TExit = FalseBranch ? TrueBBI.FalseBB : TrueBBI.TrueBB; + if (!TExit && blockAlwaysFallThrough(TrueBBI)) { + MachineFunction::iterator I = TrueBBI.BB->getIterator(); + if (++I == TrueBBI.BB->getParent()->end()) + return false; + TExit = &*I; + } + return TExit && TExit == FalseBBI.BB; +} + +/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along +/// with their common predecessor) forms a valid diamond shape for ifcvt. +bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, + unsigned &Dups1, unsigned &Dups2) const { + Dups1 = Dups2 = 0; + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone || + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone) + return false; + + MachineBasicBlock *TT = TrueBBI.TrueBB; + MachineBasicBlock *FT = FalseBBI.TrueBB; + + if (!TT && blockAlwaysFallThrough(TrueBBI)) + TT = getNextBlock(TrueBBI.BB); + if (!FT && blockAlwaysFallThrough(FalseBBI)) + FT = getNextBlock(FalseBBI.BB); + if (TT != FT) + return false; + if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable)) + return false; + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) + return false; + + // FIXME: Allow true block to have an early exit? + if (TrueBBI.FalseBB || FalseBBI.FalseBB || + (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)) + return false; + + // Count duplicate instructions at the beginning of the true and false blocks. + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + while (TIB != TIE && FIB != FIE) { + // Skip dbg_value instructions. These do not count. + if (TIB->isDebugValue()) { + while (TIB != TIE && TIB->isDebugValue()) + ++TIB; + if (TIB == TIE) + break; + } + if (FIB->isDebugValue()) { + while (FIB != FIE && FIB->isDebugValue()) + ++FIB; + if (FIB == FIE) + break; + } + if (!TIB->isIdenticalTo(FIB)) + break; + ++Dups1; + ++TIB; + ++FIB; + } + + // Now, in preparation for counting duplicate instructions at the ends of the + // blocks, move the end iterators up past any branch instructions. + while (TIE != TIB) { + --TIE; + if (!TIE->isBranch()) + break; + } + while (FIE != FIB) { + --FIE; + if (!FIE->isBranch()) + break; + } + + // If Dups1 includes all of a block, then don't count duplicate + // instructions at the end of the blocks. + if (TIB == TIE || FIB == FIE) + return true; + + // Count duplicate instructions at the ends of the blocks. + while (TIE != TIB && FIE != FIB) { + // Skip dbg_value instructions. These do not count. + if (TIE->isDebugValue()) { + while (TIE != TIB && TIE->isDebugValue()) + --TIE; + if (TIE == TIB) + break; + } + if (FIE->isDebugValue()) { + while (FIE != FIB && FIE->isDebugValue()) + --FIE; + if (FIE == FIB) + break; + } + if (!TIE->isIdenticalTo(FIE)) + break; + ++Dups2; + --TIE; + --FIE; + } + + return true; +} + +/// ScanInstructions - Scan all the instructions in the block to determine if +/// the block is predicable. In most cases, that means all the instructions +/// in the block are isPredicable(). Also checks if the block contains any +/// instruction which can clobber a predicate (e.g. condition code register). +/// If so, the block is not predicable unless it's the last instruction. +void IfConverter::ScanInstructions(BBInfo &BBI) { + if (BBI.IsDone) + return; + + bool AlreadyPredicated = !BBI.Predicate.empty(); + // First analyze the end of BB branches. + BBI.TrueBB = BBI.FalseBB = nullptr; + BBI.BrCond.clear(); + BBI.IsBrAnalyzable = + !TII->AnalyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond); + BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr; + + if (BBI.BrCond.size()) { + // No false branch. This BB must end with a conditional branch and a + // fallthrough. + if (!BBI.FalseBB) + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + if (!BBI.FalseBB) { + // Malformed bcc? True and false blocks are the same? + BBI.IsUnpredicable = true; + return; + } + } + + // Then scan all the instructions. + BBI.NonPredSize = 0; + BBI.ExtraCost = 0; + BBI.ExtraCost2 = 0; + BBI.ClobbersPred = false; + for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); + I != E; ++I) { + if (I->isDebugValue()) + continue; + + if (I->isNotDuplicable()) + BBI.CannotBeCopied = true; + + bool isPredicated = TII->isPredicated(I); + bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); + + // A conditional branch is not predicable, but it may be eliminated. + if (isCondBr) + continue; + + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; + } + + if (BBI.ClobbersPred && !isPredicated) { + // Predicate modification instruction should end the block (except for + // already predicated instructions and end of block branches). + // Predicate may have been modified, the subsequent (currently) + // unpredicated instructions cannot be correctly predicated. + BBI.IsUnpredicable = true; + return; + } + + // FIXME: Make use of PredDefs? e.g. ADDC, SUBC sets predicates but are + // still potentially predicable. + std::vector<MachineOperand> PredDefs; + if (TII->DefinesPredicate(I, PredDefs)) + BBI.ClobbersPred = true; + + if (!TII->isPredicable(I)) { + BBI.IsUnpredicable = true; + return; + } + } +} + +/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be +/// predicated by the specified predicate. +bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, + SmallVectorImpl<MachineOperand> &Pred, + bool isTriangle, bool RevBranch) { + // If the block is dead or unpredicable, then it cannot be predicated. + if (BBI.IsDone || BBI.IsUnpredicable) + return false; + + // If it is already predicated but we couldn't analyze its terminator, the + // latter might fallthrough, but we can't determine where to. + // Conservatively avoid if-converting again. + if (BBI.Predicate.size() && !BBI.IsBrAnalyzable) + return false; + + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) + return false; + + if (BBI.BrCond.size()) { + if (!isTriangle) + return false; + + // Test predicate subsumption. + SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end()); + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (RevBranch) { + if (TII->ReverseBranchCondition(Cond)) + return false; + } + if (TII->ReverseBranchCondition(RevPred) || + !TII->SubsumesPredicate(Cond, RevPred)) + return false; + } + + return true; +} + +/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from +/// the specified block. Record its successors and whether it looks like an +/// if-conversion candidate. +void IfConverter::AnalyzeBlock(MachineBasicBlock *MBB, + std::vector<IfcvtToken*> &Tokens) { + struct BBState { + BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {} + MachineBasicBlock *MBB; + + /// This flag is true if MBB's successors have been analyzed. + bool SuccsAnalyzed; + }; + + // Push MBB to the stack. + SmallVector<BBState, 16> BBStack(1, MBB); + + while (!BBStack.empty()) { + BBState &State = BBStack.back(); + MachineBasicBlock *BB = State.MBB; + BBInfo &BBI = BBAnalysis[BB->getNumber()]; + + if (!State.SuccsAnalyzed) { + if (BBI.IsAnalyzed || BBI.IsBeingAnalyzed) { + BBStack.pop_back(); + continue; + } + + BBI.BB = BB; + BBI.IsBeingAnalyzed = true; + + ScanInstructions(BBI); + + // Unanalyzable or ends with fallthrough or unconditional branch, or if is + // not considered for ifcvt anymore. + if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + BBStack.pop_back(); + continue; + } + + // Do not ifcvt if either path is a back edge to the entry block. + if (BBI.TrueBB == BB || BBI.FalseBB == BB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + BBStack.pop_back(); + continue; + } + + // Do not ifcvt if true and false fallthrough blocks are the same. + if (!BBI.FalseBB) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + BBStack.pop_back(); + continue; + } + + // Push the False and True blocks to the stack. + State.SuccsAnalyzed = true; + BBStack.push_back(BBI.FalseBB); + BBStack.push_back(BBI.TrueBB); + continue; + } + + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + + if (TrueBBI.IsDone && FalseBBI.IsDone) { + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + BBStack.pop_back(); + continue; + } + + SmallVector<MachineOperand, 4> + RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + bool CanRevCond = !TII->ReverseBranchCondition(RevCond); + + unsigned Dups = 0; + unsigned Dups2 = 0; + bool TNeedSub = !TrueBBI.Predicate.empty(); + bool FNeedSub = !FalseBBI.Predicate.empty(); + bool Enqueued = false; + + BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB); + + if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && + MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) + + TrueBBI.ExtraCost), TrueBBI.ExtraCost2, + *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) + + FalseBBI.ExtraCost),FalseBBI.ExtraCost2, + Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + // Diamond: + // EBB + // / \_ + // | | + // TBB FBB + // \ / + // TailBB + // Note TailBB can be empty. + Tokens.push_back(new IfcvtToken(BBI, ICDiamond, TNeedSub|FNeedSub, Dups, + Dups2)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { + // Triangle: + // EBB + // | \_ + // | | + // | TBB + // | / + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(TrueBBI, Dups, Prediction) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost, + TrueBBI.ExtraCost2, Prediction) && + FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { + // Simple (split, no rejoin): + // EBB + // | \_ + // | | + // | TBB---> exit + // | + // FBB + Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); + Enqueued = true; + } + + if (CanRevCond) { + // Try the other path... + if (ValidTriangle(FalseBBI, TrueBBI, false, Dups, + Prediction.getCompl()) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, Prediction.getCompl()) && + FeasibilityAnalysis(FalseBBI, RevCond, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidTriangle(FalseBBI, TrueBBI, true, Dups, + Prediction.getCompl()) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, Prediction.getCompl()) && + FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { + Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); + Enqueued = true; + } + + if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) && + MeetIfcvtSizeLimit(*FalseBBI.BB, + FalseBBI.NonPredSize + FalseBBI.ExtraCost, + FalseBBI.ExtraCost2, Prediction.getCompl()) && + FeasibilityAnalysis(FalseBBI, RevCond)) { + Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); + Enqueued = true; + } + } + + BBI.IsEnqueued = Enqueued; + BBI.IsBeingAnalyzed = false; + BBI.IsAnalyzed = true; + BBStack.pop_back(); + } +} + +/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion +/// candidates. +void IfConverter::AnalyzeBlocks(MachineFunction &MF, + std::vector<IfcvtToken*> &Tokens) { + for (auto &BB : MF) + AnalyzeBlock(&BB, Tokens); + + // Sort to favor more complex ifcvt scheme. + std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); +} + +/// canFallThroughTo - Returns true either if ToBB is the next block after BB or +/// that all the intervening blocks are empty (given BB can fall through to its +/// next block). +static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { + MachineFunction::iterator PI = BB->getIterator(); + MachineFunction::iterator I = std::next(PI); + MachineFunction::iterator TI = ToBB->getIterator(); + MachineFunction::iterator E = BB->getParent()->end(); + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(&*I)) + return false; + PI = I++; + } + return true; +} + +/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed +/// to determine if it can be if-converted. If predecessor is already enqueued, +/// dequeue it! +void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { + for (const auto &Predecessor : BB->predecessors()) { + BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()]; + if (PBBI.IsDone || PBBI.BB == BB) + continue; + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } +} + +/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB. +/// +static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, + const TargetInstrInfo *TII) { + DebugLoc dl; // FIXME: this is nowhere + SmallVector<MachineOperand, 0> NoCond; + TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl); +} + +/// RemoveExtraEdges - Remove true / false edges if either / both are no longer +/// successors. +void IfConverter::RemoveExtraEdges(BBInfo &BBI) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BBI.BB, TBB, FBB, Cond)) + BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); +} + +/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all +/// values defined in MI which are not live/used by MI. +static void UpdatePredRedefs(MachineInstr *MI, LivePhysRegs &Redefs) { + SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers; + Redefs.stepForward(*MI, Clobbers); + + // Now add the implicit uses for each of the clobbered values. + for (auto Reg : Clobbers) { + // FIXME: Const cast here is nasty, but better than making StepForward + // take a mutable instruction instead of const. + MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second); + MachineInstr *OpMI = Op.getParent(); + MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI); + if (Op.isRegMask()) { + // First handle regmasks. They clobber any entries in the mask which + // means that we need a def for those registers. + MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef); + + // We also need to add an implicit def of this register for the later + // use to read from. + // For the register allocator to have allocated a register clobbered + // by the call which is used later, it must be the case that + // the call doesn't return. + MIB.addReg(Reg.first, RegState::Implicit | RegState::Define); + continue; + } + assert(Op.isReg() && "Register operand required"); + if (Op.isDead()) { + // If we found a dead def, but it needs to be live, then remove the dead + // flag. + if (Redefs.contains(Op.getReg())) + Op.setIsDead(false); + } + MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef); + } +} + +/** + * Remove kill flags from operands with a registers in the @p DontKill set. + */ +static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) { + for (MIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->isKill()) + continue; + if (DontKill.contains(O->getReg())) + O->setIsKill(false); + } +} + +/** + * Walks a range of machine instructions and removes kill flags for registers + * in the @p DontKill set. + */ +static void RemoveKills(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + const LivePhysRegs &DontKill, + const MCRegisterInfo &MCRI) { + for ( ; I != E; ++I) + RemoveKills(*I, DontKill); +} + +/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. +/// +bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICSimpleFalse) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (CvtBBI->BB->hasAddressTaken()) + // Conservatively abort if-conversion if BB's address is taken. + return false; + + if (Kind == ICSimpleFalse) + if (TII->ReverseBranchCondition(Cond)) + llvm_unreachable("Unable to reverse branch condition!"); + + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); + + // Compute a set of registers which must not be killed by instructions in + // BB1: This is everything live-in to BB2. + DontKill.init(TRI); + DontKill.addLiveIns(NextBBI->BB); + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. + BBI.BB->removeSuccessor(CvtBBI->BB, true); + } else { + RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Merge converted block into entry block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI); + } + + bool IterIfcvt = true; + if (!canFallThroughTo(BBI.BB, NextBBI->BB)) { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + // Now ifcvt'd block will look like this: + // BB: + // ... + // t, f = cmp + // if t op + // b BBf + // + // We cannot further ifcvt this block because the unconditional branch + // will have to be predicated on the new condition, that will not be + // available if cmp executes. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertTriangle - If convert a triangle sub-CFG. +/// +bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + BBInfo *CvtBBI = &TrueBBI; + BBInfo *NextBBI = &FalseBBI; + DebugLoc dl; // FIXME: this is nowhere + + SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + std::swap(CvtBBI, NextBBI); + + if (CvtBBI->IsDone || + (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) { + // Something has changed. It's no longer safe to predicate this block. + BBI.IsAnalyzed = false; + CvtBBI->IsAnalyzed = false; + return false; + } + + if (CvtBBI->BB->hasAddressTaken()) + // Conservatively abort if-conversion if BB's address is taken. + return false; + + if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) + if (TII->ReverseBranchCondition(Cond)) + llvm_unreachable("Unable to reverse branch condition!"); + + if (Kind == ICTriangleRev || Kind == ICTriangleFRev) { + if (ReverseBranchCondition(*CvtBBI)) { + // BB has been changed, modify its predecessors (except for this + // one) so they don't get ifcvt'ed based on bad intel. + for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(), + E = CvtBBI->BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PBB = *PI; + if (PBB == BBI.BB) + continue; + BBInfo &PBBI = BBAnalysis[PBB->getNumber()]; + if (PBBI.IsEnqueued) { + PBBI.IsAnalyzed = false; + PBBI.IsEnqueued = false; + } + } + } + } + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB); + Redefs.addLiveIns(NextBBI->BB); + + DontKill.clear(); + + bool HasEarlyExit = CvtBBI->FalseBB != nullptr; + BranchProbability CvtNext, CvtFalse, BBNext, BBCvt; + + if (HasEarlyExit) { + // Get probabilities before modifying CvtBBI->BB and BBI.BB. + CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB); + CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB); + BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB); + BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB); + } + + if (CvtBBI->BB->pred_size() > 1) { + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Copy instructions in the true block, predicate them, and add them to + // the entry block. + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, so + // explicitly remove CvtBBI as a successor. + BBI.BB->removeSuccessor(CvtBBI->BB, true); + } else { + // Predicate the 'true' block after removing its branch. + CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + + // Now merge the entry of the triangle with the true block. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + MergeBlocks(BBI, *CvtBBI, false); + } + + // If 'true' block has a 'false' successor, add an exit branch to it. + if (HasEarlyExit) { + SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(), + CvtBBI->BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + llvm_unreachable("Unable to reverse branch condition!"); + + // Update the edge probability for both CvtBBI->FalseBB and NextBBI. + // NewNext = New_Prob(BBI.BB, NextBBI->BB) = + // Prob(BBI.BB, NextBBI->BB) + + // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB) + // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) = + // Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB) + auto NewTrueBB = getNextBlock(BBI.BB); + auto NewNext = BBNext + BBCvt * CvtNext; + auto NewTrueBBIter = + std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB); + if (NewTrueBBIter != BBI.BB->succ_end()) + BBI.BB->setSuccProbability(NewTrueBBIter, NewNext); + + auto NewFalse = BBCvt * CvtFalse; + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl); + BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse); + } + + // Merge in the 'false' block if the 'false' block has no other + // predecessors. Otherwise, add an unconditional branch to 'false'. + bool FalseBBDead = false; + bool IterIfcvt = true; + bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB); + if (!isFallThrough) { + // Only merge them if the true block does not fallthrough to the false + // block. By not merging them, we make it possible to iteratively + // ifcvt the blocks. + if (!HasEarlyExit && + NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough && + !NextBBI->BB->hasAddressTaken()) { + MergeBlocks(BBI, *NextBBI); + FalseBBDead = true; + } else { + InsertUncondBranch(BBI.BB, NextBBI->BB, TII); + BBI.HasFallThrough = false; + } + // Mixed predicated and unpredicated code. This cannot be iteratively + // predicated. + IterIfcvt = false; + } + + RemoveExtraEdges(BBI); + + // Update block info. BB can be iteratively if-converted. + if (!IterIfcvt) + BBI.IsDone = true; + InvalidatePreds(BBI.BB); + CvtBBI->IsDone = true; + if (FalseBBDead) + NextBBI->IsDone = true; + + // FIXME: Must maintain LiveIns. + return true; +} + +/// IfConvertDiamond - If convert a diamond sub-CFG. +/// +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, + unsigned NumDups1, unsigned NumDups2) { + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()]; + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; + MachineBasicBlock *TailBB = TrueBBI.TrueBB; + // True block must fall through or end with an unanalyzable terminator. + if (!TailBB) { + if (blockAlwaysFallThrough(TrueBBI)) + TailBB = FalseBBI.TrueBB; + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!"); + } + + if (TrueBBI.IsDone || FalseBBI.IsDone || + TrueBBI.BB->pred_size() > 1 || + FalseBBI.BB->pred_size() > 1) { + // Something has changed. It's no longer safe to predicate these blocks. + BBI.IsAnalyzed = false; + TrueBBI.IsAnalyzed = false; + FalseBBI.IsAnalyzed = false; + return false; + } + + if (TrueBBI.BB->hasAddressTaken() || FalseBBI.BB->hasAddressTaken()) + // Conservatively abort if-conversion if either BB has its address taken. + return false; + + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. + BBInfo *BBI1 = &TrueBBI; + BBInfo *BBI2 = &FalseBBI; + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); + if (TII->ReverseBranchCondition(RevCond)) + llvm_unreachable("Unable to reverse branch condition!"); + SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond; + SmallVector<MachineOperand, 4> *Cond2 = &RevCond; + + // Figure out the more profitable ordering. + bool DoSwap = false; + if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred) + DoSwap = true; + else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) { + if (TrueBBI.NonPredSize > FalseBBI.NonPredSize) + DoSwap = true; + } + if (DoSwap) { + std::swap(BBI1, BBI2); + std::swap(Cond1, Cond2); + } + + // Remove the conditional branch from entry to the blocks. + BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + Redefs.init(TRI); + Redefs.addLiveIns(BBI1->BB); + + // Remove the duplicated instructions at the beginnings of both paths. + // Skip dbg_value instructions + MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr(); + MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr(); + BBI1->NonPredSize -= NumDups1; + BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } + while (NumDups1 != 0) { + ++DI2; + if (!DI2->isDebugValue()) + --NumDups1; + } + + // Compute a set of registers which must not be killed by instructions in BB1: + // This is everything used+live in BB2 after the duplicated instructions. We + // can compute this set by simulating liveness backwards from the end of BB2. + DontKill.init(TRI); + for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), + E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { + DontKill.stepBackward(*I); + } + + for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; + ++I) { + SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers; + Redefs.stepForward(*I, IgnoredClobbers); + } + BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); + BBI2->BB->erase(BBI2->BB->begin(), DI2); + + // Remove branch from 'true' block and remove duplicated instructions. + BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); + DI1 = BBI1->BB->end(); + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); + --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } + BBI1->BB->erase(DI1, BBI1->BB->end()); + + // Kill flags in the true block for registers living into the false block + // must be removed. + RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + + // Remove 'false' block branch and find the last instruction to predicate. + BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); + DI2 = BBI2->BB->end(); + while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); + --DI2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; + } + + // Remember which registers would later be defined by the false block. + // This allows us not to predicate instructions in the true block that would + // later be re-defined. That is, rather than + // subeq r0, r1, #1 + // addne r0, r1, #1 + // generate: + // sub r0, r1, #1 + // addne r0, r1, #1 + SmallSet<unsigned, 4> RedefsByFalse; + SmallSet<unsigned, 4> ExtUses; + if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) { + for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) { + if (FI->isDebugValue()) + continue; + SmallVector<unsigned, 4> Defs; + for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = FI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) { + Defs.push_back(Reg); + } else if (!RedefsByFalse.count(Reg)) { + // These are defined before ctrl flow reach the 'false' instructions. + // They cannot be modified by the 'true' instructions. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + ExtUses.insert(*SubRegs); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (!ExtUses.count(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + RedefsByFalse.insert(*SubRegs); + } + } + } + } + + // Predicate the 'true' block. + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); + + // Predicate the 'false' block. + PredicateBlock(*BBI2, DI2, *Cond2); + + // Merge the true block into the entry of the diamond. + MergeBlocks(BBI, *BBI1, TailBB == nullptr); + MergeBlocks(BBI, *BBI2, TailBB == nullptr); + + // If the if-converted block falls through or unconditionally branches into + // the tail block, and the tail block does not have other predecessors, then + // fold the tail block in as well. Otherwise, unless it falls through to the + // tail, add a unconditional branch to it. + if (TailBB) { + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; + bool CanMergeTail = !TailBBI.HasFallThrough && + !TailBBI.BB->hasAddressTaken(); + // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; + // check if there are any other predecessors besides those. + unsigned NumPreds = TailBB->pred_size(); + if (NumPreds > 1) + CanMergeTail = false; + else if (NumPreds == 1 && CanMergeTail) { + MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); + if (*PI != BBI1->BB && *PI != BBI2->BB) + CanMergeTail = false; + } + if (CanMergeTail) { + MergeBlocks(BBI, TailBBI); + TailBBI.IsDone = true; + } else { + BBI.BB->addSuccessor(TailBB, BranchProbability::getOne()); + InsertUncondBranch(BBI.BB, TailBB, TII); + BBI.HasFallThrough = false; + } + } + + // RemoveExtraEdges won't work if the block has an unanalyzable branch, + // which can happen here if TailBB is unanalyzable and is merged, so + // explicitly remove BBI1 and BBI2 as successors. + BBI.BB->removeSuccessor(BBI1->BB); + BBI.BB->removeSuccessor(BBI2->BB, true); + RemoveExtraEdges(BBI); + + // Update block info. + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true; + InvalidatePreds(BBI.BB); + + // FIXME: Must maintain LiveIns. + return true; +} + +static bool MaySpeculate(const MachineInstr *MI, + SmallSet<unsigned, 4> &LaterRedefs) { + bool SawStore = true; + if (!MI->isSafeToMove(nullptr, SawStore)) + return false; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef() && !LaterRedefs.count(Reg)) + return false; + } + + return true; +} + +/// PredicateBlock - Predicate instructions from the start of the block to the +/// specified end with the specified condition. +void IfConverter::PredicateBlock(BBInfo &BBI, + MachineBasicBlock::iterator E, + SmallVectorImpl<MachineOperand> &Cond, + SmallSet<unsigned, 4> *LaterRedefs) { + bool AnyUnpred = false; + bool MaySpec = LaterRedefs != nullptr; + for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { + if (I->isDebugValue() || TII->isPredicated(I)) + continue; + // It may be possible not to predicate an instruction if it's the 'true' + // side of a diamond and the 'false' side may re-define the instruction's + // defs. + if (MaySpec && MaySpeculate(I, *LaterRedefs)) { + AnyUnpred = true; + continue; + } + // If any instruction is predicated, then every instruction after it must + // be predicated. + MaySpec = false; + if (!TII->PredicateInstruction(I, Cond)) { +#ifndef NDEBUG + dbgs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(nullptr); + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs); + } + + BBI.Predicate.append(Cond.begin(), Cond.end()); + + BBI.IsAnalyzed = false; + BBI.NonPredSize = 0; + + ++NumIfConvBBs; + if (AnyUnpred) + ++NumUnpred; +} + +/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to +/// the destination block. Skip end of block branches if IgnoreBr is true. +void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, + SmallVectorImpl<MachineOperand> &Cond, + bool IgnoreBr) { + MachineFunction &MF = *ToBBI.BB->getParent(); + + for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), + E = FromBBI.BB->end(); I != E; ++I) { + // Do not copy the end of the block branches. + if (IgnoreBr && I->isBranch()) + break; + + MachineInstr *MI = MF.CloneMachineInstr(I); + ToBBI.BB->insert(ToBBI.BB->end(), MI); + ToBBI.NonPredSize++; + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + if (NumCycles > 1) + ToBBI.ExtraCost += NumCycles-1; + ToBBI.ExtraCost2 += ExtraPredCost; + + if (!TII->isPredicated(I) && !MI->isDebugValue()) { + if (!TII->PredicateInstruction(MI, Cond)) { +#ifndef NDEBUG + dbgs() << "Unable to predicate " << *I << "!\n"; +#endif + llvm_unreachable(nullptr); + } + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs); + + // Some kill flags may not be correct anymore. + if (!DontKill.empty()) + RemoveKills(*MI, DontKill); + } + + if (!IgnoreBr) { + std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; + + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } + } + + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); + ToBBI.Predicate.append(Cond.begin(), Cond.end()); + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.IsAnalyzed = false; + + ++NumDupBBs; +} + +/// MergeBlocks - Move all instructions from FromBB to the end of ToBB. +/// This will leave FromBB as an empty block, so remove all of its +/// successor edges except for the fall-through edge. If AddEdges is true, +/// i.e., when FromBBI's branch is being moved, add those successor edges to +/// ToBBI. +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { + assert(!FromBBI.BB->hasAddressTaken() && + "Removing a BB whose address is taken!"); + + ToBBI.BB->splice(ToBBI.BB->end(), + FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); + + // Force normalizing the successors' probabilities of ToBBI.BB to convert all + // unknown probabilities into known ones. + // FIXME: This usage is too tricky and in the future we would like to + // eliminate all unknown probabilities in MBB. + ToBBI.BB->normalizeSuccProbs(); + + SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr; + // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when + // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB. + auto To2FromProb = BranchProbability::getZero(); + if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) { + To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB); + // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the + // edge probability being merged to other edges when this edge is removed + // later. + ToBBI.BB->setSuccProbability( + std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB), + BranchProbability::getZero()); + } + + for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) { + MachineBasicBlock *Succ = FromSuccs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + + auto NewProb = BranchProbability::getZero(); + if (AddEdges) { + // Calculate the edge probability for the edge from ToBBI.BB to Succ, + // which is a portion of the edge probability from FromBBI.BB to Succ. The + // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if + // FromBBI is a successor of ToBBI.BB. See comment below for excepion). + NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ); + + // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This + // only happens when if-converting a diamond CFG and FromBBI.BB is the + // tail BB. In this case FromBBI.BB post-dominates ToBBI.BB and hence we + // could just use the probabilities on FromBBI.BB's out-edges when adding + // new successors. + if (!To2FromProb.isZero()) + NewProb *= To2FromProb; + } + + FromBBI.BB->removeSuccessor(Succ); + + if (AddEdges) { + // If the edge from ToBBI.BB to Succ already exists, update the + // probability of this edge by adding NewProb to it. An example is shown + // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we + // don't have to set C as A's successor as it already is. We only need to + // update the edge probability on A->C. Note that B will not be + // immediately removed from A's successors. It is possible that B->D is + // not removed either if D is a fallthrough of B. Later the edge A->D + // (generated here) and B->D will be combined into one edge. To maintain + // correct edge probability of this combined edge, we need to set the edge + // probability of A->B to zero, which is already done above. The edge + // probability on A->D is calculated by scaling the original probability + // on A->B by the probability of B->D. + // + // Before ifcvt: After ifcvt (assume B->D is kept): + // + // A A + // /| /|\ + // / B / B| + // | /| | || + // |/ | | |/ + // C D C D + // + if (ToBBI.BB->isSuccessor(Succ)) + ToBBI.BB->setSuccProbability( + std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ), + MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb); + else + ToBBI.BB->addSuccessor(Succ, NewProb); + } + } + + // Now FromBBI always falls through to the next block! + if (NBB && !FromBBI.BB->isSuccessor(NBB)) + FromBBI.BB->addSuccessor(NBB); + + // Normalize the probabilities of ToBBI.BB's successors with all adjustment + // we've done above. + ToBBI.BB->normalizeSuccProbs(); + + ToBBI.Predicate.append(FromBBI.Predicate.begin(), FromBBI.Predicate.end()); + FromBBI.Predicate.clear(); + + ToBBI.NonPredSize += FromBBI.NonPredSize; + ToBBI.ExtraCost += FromBBI.ExtraCost; + ToBBI.ExtraCost2 += FromBBI.ExtraCost2; + FromBBI.NonPredSize = 0; + FromBBI.ExtraCost = 0; + FromBBI.ExtraCost2 = 0; + + ToBBI.ClobbersPred |= FromBBI.ClobbersPred; + ToBBI.HasFallThrough = FromBBI.HasFallThrough; + ToBBI.IsAnalyzed = false; + FromBBI.IsAnalyzed = false; +} + +FunctionPass * +llvm::createIfConverter(std::function<bool(const Function &)> Ftor) { + return new IfConverter(Ftor); +} diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp new file mode 100644 index 0000000..39c1b9f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp @@ -0,0 +1,416 @@ +//===-- ImplicitNullChecks.cpp - Fold null checks into memory accesses ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass turns explicit null checks of the form +// +// test %r10, %r10 +// je throw_npe +// movl (%r10), %esi +// ... +// +// to +// +// faulting_load_op("movl (%r10), %esi", throw_npe) +// ... +// +// With the help of a runtime that understands the .fault_maps section, +// faulting_load_op branches to throw_npe if executing movl (%r10), %esi incurs +// a page fault. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +static cl::opt<unsigned> PageSize("imp-null-check-page-size", + cl::desc("The page size of the target in " + "bytes"), + cl::init(4096)); + +#define DEBUG_TYPE "implicit-null-checks" + +STATISTIC(NumImplicitNullChecks, + "Number of explicit null checks made implicit"); + +namespace { + +class ImplicitNullChecks : public MachineFunctionPass { + /// Represents one null check that can be made implicit. + struct NullCheck { + // The memory operation the null check can be folded into. + MachineInstr *MemOperation; + + // The instruction actually doing the null check (Ptr != 0). + MachineInstr *CheckOperation; + + // The block the check resides in. + MachineBasicBlock *CheckBlock; + + // The block branched to if the pointer is non-null. + MachineBasicBlock *NotNullSucc; + + // The block branched to if the pointer is null. + MachineBasicBlock *NullSucc; + + NullCheck() + : MemOperation(), CheckOperation(), CheckBlock(), NotNullSucc(), + NullSucc() {} + + explicit NullCheck(MachineInstr *memOperation, MachineInstr *checkOperation, + MachineBasicBlock *checkBlock, + MachineBasicBlock *notNullSucc, + MachineBasicBlock *nullSucc) + : MemOperation(memOperation), CheckOperation(checkOperation), + CheckBlock(checkBlock), NotNullSucc(notNullSucc), NullSucc(nullSucc) { + } + }; + + const TargetInstrInfo *TII = nullptr; + const TargetRegisterInfo *TRI = nullptr; + MachineModuleInfo *MMI = nullptr; + + bool analyzeBlockForNullChecks(MachineBasicBlock &MBB, + SmallVectorImpl<NullCheck> &NullCheckList); + MachineInstr *insertFaultingLoad(MachineInstr *LoadMI, MachineBasicBlock *MBB, + MCSymbol *HandlerLabel); + void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList); + +public: + static char ID; + + ImplicitNullChecks() : MachineFunctionPass(ID) { + initializeImplicitNullChecksPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +/// \brief Detect re-ordering hazards and dependencies. +/// +/// This class keeps track of defs and uses, and can be queried if a given +/// machine instruction can be re-ordered from after the machine instructions +/// seen so far to before them. +class HazardDetector { + DenseSet<unsigned> RegDefs; + DenseSet<unsigned> RegUses; + const TargetRegisterInfo &TRI; + bool hasSeenClobber; + +public: + explicit HazardDetector(const TargetRegisterInfo &TRI) : + TRI(TRI), hasSeenClobber(false) {} + + /// \brief Make a note of \p MI for later queries to isSafeToHoist. + /// + /// May clobber this HazardDetector instance. \see isClobbered. + void rememberInstruction(MachineInstr *MI); + + /// \brief Return true if it is safe to hoist \p MI from after all the + /// instructions seen so far (via rememberInstruction) to before it. + bool isSafeToHoist(MachineInstr *MI); + + /// \brief Return true if this instance of HazardDetector has been clobbered + /// (i.e. has no more useful information). + /// + /// A HazardDetecter is clobbered when it sees a construct it cannot + /// understand, and it would have to return a conservative answer for all + /// future queries. Having a separate clobbered state lets the client code + /// bail early, without making queries about all of the future instructions + /// (which would have returned the most conservative answer anyway). + /// + /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector + /// is an error. + bool isClobbered() { return hasSeenClobber; } +}; +} + + +void HazardDetector::rememberInstruction(MachineInstr *MI) { + assert(!isClobbered() && + "Don't add instructions to a clobbered hazard detector"); + + if (MI->mayStore() || MI->hasUnmodeledSideEffects()) { + hasSeenClobber = true; + return; + } + + for (auto *MMO : MI->memoperands()) { + // Right now we don't want to worry about LLVM's memory model. + if (!MMO->isUnordered()) { + hasSeenClobber = true; + return; + } + } + + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.getReg()) + continue; + + if (MO.isDef()) + RegDefs.insert(MO.getReg()); + else + RegUses.insert(MO.getReg()); + } +} + +bool HazardDetector::isSafeToHoist(MachineInstr *MI) { + assert(!isClobbered() && "isSafeToHoist cannot do anything useful!"); + + // Right now we don't want to worry about LLVM's memory model. This can be + // made more precise later. + for (auto *MMO : MI->memoperands()) + if (!MMO->isUnordered()) + return false; + + for (auto &MO : MI->operands()) { + if (MO.isReg() && MO.getReg()) { + for (unsigned Reg : RegDefs) + if (TRI.regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-write or read-after-write + + if (MO.isDef()) + for (unsigned Reg : RegUses) + if (TRI.regsOverlap(Reg, MO.getReg())) + return false; // We found a write-after-read + } + } + + return true; +} + +bool ImplicitNullChecks::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getRegInfo().getTargetRegisterInfo(); + MMI = &MF.getMMI(); + + SmallVector<NullCheck, 16> NullCheckList; + + for (auto &MBB : MF) + analyzeBlockForNullChecks(MBB, NullCheckList); + + if (!NullCheckList.empty()) + rewriteNullChecks(NullCheckList); + + return !NullCheckList.empty(); +} + +/// Analyze MBB to check if its terminating branch can be turned into an +/// implicit null check. If yes, append a description of the said null check to +/// NullCheckList and return true, else return false. +bool ImplicitNullChecks::analyzeBlockForNullChecks( + MachineBasicBlock &MBB, SmallVectorImpl<NullCheck> &NullCheckList) { + typedef TargetInstrInfo::MachineBranchPredicate MachineBranchPredicate; + + MDNode *BranchMD = nullptr; + if (auto *BB = MBB.getBasicBlock()) + BranchMD = BB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit); + + if (!BranchMD) + return false; + + MachineBranchPredicate MBP; + + if (TII->AnalyzeBranchPredicate(MBB, MBP, true)) + return false; + + // Is the predicate comparing an integer to zero? + if (!(MBP.LHS.isReg() && MBP.RHS.isImm() && MBP.RHS.getImm() == 0 && + (MBP.Predicate == MachineBranchPredicate::PRED_NE || + MBP.Predicate == MachineBranchPredicate::PRED_EQ))) + return false; + + // If we cannot erase the test instruction itself, then making the null check + // implicit does not buy us much. + if (!MBP.SingleUseCondition) + return false; + + MachineBasicBlock *NotNullSucc, *NullSucc; + + if (MBP.Predicate == MachineBranchPredicate::PRED_NE) { + NotNullSucc = MBP.TrueDest; + NullSucc = MBP.FalseDest; + } else { + NotNullSucc = MBP.FalseDest; + NullSucc = MBP.TrueDest; + } + + // We handle the simplest case for now. We can potentially do better by using + // the machine dominator tree. + if (NotNullSucc->pred_size() != 1) + return false; + + // Starting with a code fragment like: + // + // test %RAX, %RAX + // jne LblNotNull + // + // LblNull: + // callq throw_NullPointerException + // + // LblNotNull: + // Inst0 + // Inst1 + // ... + // Def = Load (%RAX + <offset>) + // ... + // + // + // we want to end up with + // + // Def = FaultingLoad (%RAX + <offset>), LblNull + // jmp LblNotNull ;; explicit or fallthrough + // + // LblNotNull: + // Inst0 + // Inst1 + // ... + // + // LblNull: + // callq throw_NullPointerException + // + // + // To see why this is legal, consider the two possibilities: + // + // 1. %RAX is null: since we constrain <offset> to be less than PageSize, the + // load instruction dereferences the null page, causing a segmentation + // fault. + // + // 2. %RAX is not null: in this case we know that the load cannot fault, as + // otherwise the load would've faulted in the original program too and the + // original program would've been undefined. + // + // This reasoning cannot be extended to justify hoisting through arbitrary + // control flow. For instance, in the example below (in pseudo-C) + // + // if (ptr == null) { throw_npe(); unreachable; } + // if (some_cond) { return 42; } + // v = ptr->field; // LD + // ... + // + // we cannot (without code duplication) use the load marked "LD" to null check + // ptr -- clause (2) above does not apply in this case. In the above program + // the safety of ptr->field can be dependent on some_cond; and, for instance, + // ptr could be some non-null invalid reference that never gets loaded from + // because some_cond is always true. + + unsigned PointerReg = MBP.LHS.getReg(); + + HazardDetector HD(*TRI); + + for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE; + ++MII) { + MachineInstr *MI = &*MII; + unsigned BaseReg, Offset; + if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI)) + if (MI->mayLoad() && !MI->isPredicable() && BaseReg == PointerReg && + Offset < PageSize && MI->getDesc().getNumDefs() <= 1 && + HD.isSafeToHoist(MI)) { + NullCheckList.emplace_back(MI, MBP.ConditionDef, &MBB, NotNullSucc, + NullSucc); + return true; + } + + HD.rememberInstruction(MI); + if (HD.isClobbered()) + return false; + } + + return false; +} + +/// Wrap a machine load instruction, LoadMI, into a FAULTING_LOAD_OP machine +/// instruction. The FAULTING_LOAD_OP instruction does the same load as LoadMI +/// (defining the same register), and branches to HandlerLabel if the load +/// faults. The FAULTING_LOAD_OP instruction is inserted at the end of MBB. +MachineInstr *ImplicitNullChecks::insertFaultingLoad(MachineInstr *LoadMI, + MachineBasicBlock *MBB, + MCSymbol *HandlerLabel) { + const unsigned NoRegister = 0; // Guaranteed to be the NoRegister value for + // all targets. + + DebugLoc DL; + unsigned NumDefs = LoadMI->getDesc().getNumDefs(); + assert(NumDefs <= 1 && "other cases unhandled!"); + + unsigned DefReg = NoRegister; + if (NumDefs != 0) { + DefReg = LoadMI->defs().begin()->getReg(); + assert(std::distance(LoadMI->defs().begin(), LoadMI->defs().end()) == 1 && + "expected exactly one def!"); + } + + auto MIB = BuildMI(MBB, DL, TII->get(TargetOpcode::FAULTING_LOAD_OP), DefReg) + .addSym(HandlerLabel) + .addImm(LoadMI->getOpcode()); + + for (auto &MO : LoadMI->uses()) + MIB.addOperand(MO); + + MIB.setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end()); + + return MIB; +} + +/// Rewrite the null checks in NullCheckList into implicit null checks. +void ImplicitNullChecks::rewriteNullChecks( + ArrayRef<ImplicitNullChecks::NullCheck> NullCheckList) { + DebugLoc DL; + + for (auto &NC : NullCheckList) { + MCSymbol *HandlerLabel = MMI->getContext().createTempSymbol(); + + // Remove the conditional branch dependent on the null check. + unsigned BranchesRemoved = TII->RemoveBranch(*NC.CheckBlock); + (void)BranchesRemoved; + assert(BranchesRemoved > 0 && "expected at least one branch!"); + + // Insert a faulting load where the conditional branch was originally. We + // check earlier ensures that this bit of code motion is legal. We do not + // touch the successors list for any basic block since we haven't changed + // control flow, we've just made it implicit. + insertFaultingLoad(NC.MemOperation, NC.CheckBlock, HandlerLabel); + NC.MemOperation->eraseFromParent(); + NC.CheckOperation->eraseFromParent(); + + // Insert an *unconditional* branch to not-null successor. + TII->InsertBranch(*NC.CheckBlock, NC.NotNullSucc, nullptr, /*Cond=*/None, + DL); + + // Emit the HandlerLabel as an EH_LABEL. + BuildMI(*NC.NullSucc, NC.NullSucc->begin(), DL, + TII->get(TargetOpcode::EH_LABEL)).addSym(HandlerLabel); + + NumImplicitNullChecks++; + } +} + +char ImplicitNullChecks::ID = 0; +char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID; +INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks", + "Implicit null checks", false, false) +INITIALIZE_PASS_END(ImplicitNullChecks, "implicit-null-checks", + "Implicit null checks", false, false) diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp new file mode 100644 index 0000000..e310132 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -0,0 +1,1390 @@ +//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The inline spiller modifies the machine function directly instead of +// inserting spills and restores in VirtRegMap. +// +//===----------------------------------------------------------------------===// + +#include "Spiller.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumSpilledRanges, "Number of spilled live ranges"); +STATISTIC(NumSnippets, "Number of spilled snippets"); +STATISTIC(NumSpills, "Number of spills inserted"); +STATISTIC(NumSpillsRemoved, "Number of spills removed"); +STATISTIC(NumReloads, "Number of reloads inserted"); +STATISTIC(NumReloadsRemoved, "Number of reloads removed"); +STATISTIC(NumFolded, "Number of folded stack accesses"); +STATISTIC(NumFoldedLoads, "Number of folded loads"); +STATISTIC(NumRemats, "Number of rematerialized defs for spilling"); +STATISTIC(NumOmitReloadSpill, "Number of omitted spills of reloads"); +STATISTIC(NumHoists, "Number of hoisted spills"); + +static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden, + cl::desc("Disable inline spill hoisting")); + +namespace { +class InlineSpiller : public Spiller { + MachineFunction &MF; + LiveIntervals &LIS; + LiveStacks &LSS; + AliasAnalysis *AA; + MachineDominatorTree &MDT; + MachineLoopInfo &Loops; + VirtRegMap &VRM; + MachineFrameInfo &MFI; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; + + // Variables that are valid during spill(), but used by multiple methods. + LiveRangeEdit *Edit; + LiveInterval *StackInt; + int StackSlot; + unsigned Original; + + // All registers to spill to StackSlot, including the main register. + SmallVector<unsigned, 8> RegsToSpill; + + // All COPY instructions to/from snippets. + // They are ignored since both operands refer to the same stack slot. + SmallPtrSet<MachineInstr*, 8> SnippetCopies; + + // Values that failed to remat at some point. + SmallPtrSet<VNInfo*, 8> UsedValues; + +public: + // Information about a value that was defined by a copy from a sibling + // register. + struct SibValueInfo { + // True when all reaching defs were reloads: No spill is necessary. + bool AllDefsAreReloads; + + // True when value is defined by an original PHI not from splitting. + bool DefByOrigPHI; + + // True when the COPY defining this value killed its source. + bool KillsSource; + + // The preferred register to spill. + unsigned SpillReg; + + // The value of SpillReg that should be spilled. + VNInfo *SpillVNI; + + // The block where SpillVNI should be spilled. Currently, this must be the + // block containing SpillVNI->def. + MachineBasicBlock *SpillMBB; + + // A defining instruction that is not a sibling copy or a reload, or NULL. + // This can be used as a template for rematerialization. + MachineInstr *DefMI; + + // List of values that depend on this one. These values are actually the + // same, but live range splitting has placed them in different registers, + // or SSA update needed to insert PHI-defs to preserve SSA form. This is + // copies of the current value and phi-kills. Usually only phi-kills cause + // more than one dependent value. + TinyPtrVector<VNInfo*> Deps; + + SibValueInfo(unsigned Reg, VNInfo *VNI) + : AllDefsAreReloads(true), DefByOrigPHI(false), KillsSource(false), + SpillReg(Reg), SpillVNI(VNI), SpillMBB(nullptr), DefMI(nullptr) {} + + // Returns true when a def has been found. + bool hasDef() const { return DefByOrigPHI || DefMI; } + }; + +private: + // Values in RegsToSpill defined by sibling copies. + typedef DenseMap<VNInfo*, SibValueInfo> SibValueMap; + SibValueMap SibValues; + + // Dead defs generated during spilling. + SmallVector<MachineInstr*, 8> DeadDefs; + + ~InlineSpiller() override {} + +public: + InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) + : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()), + LSS(pass.getAnalysis<LiveStacks>()), + AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()), + MDT(pass.getAnalysis<MachineDominatorTree>()), + Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm), + MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), + TII(*mf.getSubtarget().getInstrInfo()), + TRI(*mf.getSubtarget().getRegisterInfo()), + MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()) {} + + void spill(LiveRangeEdit &) override; + +private: + bool isSnippet(const LiveInterval &SnipLI); + void collectRegsToSpill(); + + bool isRegToSpill(unsigned Reg) { + return std::find(RegsToSpill.begin(), + RegsToSpill.end(), Reg) != RegsToSpill.end(); + } + + bool isSibling(unsigned Reg); + MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void propagateSiblingValue(SibValueMap::iterator, VNInfo *VNI = nullptr); + void analyzeSiblingValues(); + + bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); + void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); + + void markValueUsed(LiveInterval*, VNInfo*); + bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI); + void reMaterializeAll(); + + bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); + bool foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> >, + MachineInstr *LoadMI = nullptr); + void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); + + void spillAroundUses(unsigned Reg); + void spillAll(); +}; +} + +namespace llvm { + +Spiller::~Spiller() { } +void Spiller::anchor() { } + +Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm) { + return new InlineSpiller(pass, mf, vrm); +} + +} + +//===----------------------------------------------------------------------===// +// Snippets +//===----------------------------------------------------------------------===// + +// When spilling a virtual register, we also spill any snippets it is connected +// to. The snippets are small live ranges that only have a single real use, +// leftovers from live range splitting. Spilling them enables memory operand +// folding or tightens the live range around the single use. +// +// This minimizes register pressure and maximizes the store-to-load distance for +// spill slots which can be important in tight loops. + +/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, +/// otherwise return 0. +static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { + if (!MI->isFullCopy()) + return 0; + if (MI->getOperand(0).getReg() == Reg) + return MI->getOperand(1).getReg(); + if (MI->getOperand(1).getReg() == Reg) + return MI->getOperand(0).getReg(); + return 0; +} + +/// isSnippet - Identify if a live interval is a snippet that should be spilled. +/// It is assumed that SnipLI is a virtual register with the same original as +/// Edit->getReg(). +bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { + unsigned Reg = Edit->getReg(); + + // A snippet is a tiny live range with only a single instruction using it + // besides copies to/from Reg or spills/fills. We accept: + // + // %snip = COPY %Reg / FILL fi# + // %snip = USE %snip + // %Reg = COPY %snip / SPILL %snip, fi# + // + if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) + return false; + + MachineInstr *UseMI = nullptr; + + // Check that all uses satisfy our criteria. + for (MachineRegisterInfo::reg_instr_nodbg_iterator + RI = MRI.reg_instr_nodbg_begin(SnipLI.reg), + E = MRI.reg_instr_nodbg_end(); RI != E; ) { + MachineInstr *MI = &*(RI++); + + // Allow copies to/from Reg. + if (isFullCopyOf(MI, Reg)) + continue; + + // Allow stack slot loads. + int FI; + if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow stack slot stores. + if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow a single additional instruction. + if (UseMI && MI != UseMI) + return false; + UseMI = MI; + } + return true; +} + +/// collectRegsToSpill - Collect live range snippets that only have a single +/// real use. +void InlineSpiller::collectRegsToSpill() { + unsigned Reg = Edit->getReg(); + + // Main register always spills. + RegsToSpill.assign(1, Reg); + SnippetCopies.clear(); + + // Snippets all have the same original, so there can't be any for an original + // register. + if (Original == Reg) + return; + + for (MachineRegisterInfo::reg_instr_iterator + RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); RI != E; ) { + MachineInstr *MI = &*(RI++); + unsigned SnipReg = isFullCopyOf(MI, Reg); + if (!isSibling(SnipReg)) + continue; + LiveInterval &SnipLI = LIS.getInterval(SnipReg); + if (!isSnippet(SnipLI)) + continue; + SnippetCopies.insert(MI); + if (isRegToSpill(SnipReg)) + continue; + RegsToSpill.push_back(SnipReg); + DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + ++NumSnippets; + } +} + + +//===----------------------------------------------------------------------===// +// Sibling Values +//===----------------------------------------------------------------------===// + +// After live range splitting, some values to be spilled may be defined by +// copies from sibling registers. We trace the sibling copies back to the +// original value if it still exists. We need it for rematerialization. +// +// Even when the value can't be rematerialized, we still want to determine if +// the value has already been spilled, or we may want to hoist the spill from a +// loop. + +bool InlineSpiller::isSibling(unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) && + VRM.getOriginal(Reg) == Original; +} + +#ifndef NDEBUG +static raw_ostream &operator<<(raw_ostream &OS, + const InlineSpiller::SibValueInfo &SVI) { + OS << "spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def; + if (SVI.SpillMBB) + OS << " in BB#" << SVI.SpillMBB->getNumber(); + if (SVI.AllDefsAreReloads) + OS << " all-reloads"; + if (SVI.DefByOrigPHI) + OS << " orig-phi"; + if (SVI.KillsSource) + OS << " kill"; + OS << " deps["; + for (VNInfo *Dep : SVI.Deps) + OS << ' ' << Dep->id << '@' << Dep->def; + OS << " ]"; + if (SVI.DefMI) + OS << " def: " << *SVI.DefMI; + else + OS << '\n'; + return OS; +} +#endif + +/// propagateSiblingValue - Propagate the value in SVI to dependents if it is +/// known. Otherwise remember the dependency for later. +/// +/// @param SVIIter SibValues entry to propagate. +/// @param VNI Dependent value, or NULL to propagate to all saved dependents. +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, + VNInfo *VNI) { + SibValueMap::value_type *SVI = &*SVIIter; + + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. + TinyPtrVector<VNInfo*> FirstDeps; + if (VNI) { + FirstDeps.push_back(VNI); + SVI->second.Deps.push_back(VNI); + } + + // Has the value been completely determined yet? If not, defer propagation. + if (!SVI->second.hasDef()) + return; + + // Work list of values to propagate. + SmallSetVector<SibValueMap::value_type *, 8> WorkList; + WorkList.insert(SVI); + + do { + SVI = WorkList.pop_back_val(); + TinyPtrVector<VNInfo*> *Deps = VNI ? &FirstDeps : &SVI->second.Deps; + VNI = nullptr; + + SibValueInfo &SV = SVI->second; + if (!SV.SpillMBB) + SV.SpillMBB = LIS.getMBBFromIndex(SV.SpillVNI->def); + + DEBUG(dbgs() << " prop to " << Deps->size() << ": " + << SVI->first->id << '@' << SVI->first->def << ":\t" << SV); + + assert(SV.hasDef() && "Propagating undefined value"); + + // Should this value be propagated as a preferred spill candidate? We don't + // propagate values of registers that are about to spill. + bool PropSpill = !DisableHoisting && !isRegToSpill(SV.SpillReg); + unsigned SpillDepth = ~0u; + + for (VNInfo *Dep : *Deps) { + SibValueMap::iterator DepSVI = SibValues.find(Dep); + assert(DepSVI != SibValues.end() && "Dependent value not in SibValues"); + SibValueInfo &DepSV = DepSVI->second; + if (!DepSV.SpillMBB) + DepSV.SpillMBB = LIS.getMBBFromIndex(DepSV.SpillVNI->def); + + bool Changed = false; + + // Propagate defining instruction. + if (!DepSV.hasDef()) { + Changed = true; + DepSV.DefMI = SV.DefMI; + DepSV.DefByOrigPHI = SV.DefByOrigPHI; + } + + // Propagate AllDefsAreReloads. For PHI values, this computes an AND of + // all predecessors. + if (!SV.AllDefsAreReloads && DepSV.AllDefsAreReloads) { + Changed = true; + DepSV.AllDefsAreReloads = false; + } + + // Propagate best spill value. + if (PropSpill && SV.SpillVNI != DepSV.SpillVNI) { + if (SV.SpillMBB == DepSV.SpillMBB) { + // DepSV is in the same block. Hoist when dominated. + if (DepSV.KillsSource && SV.SpillVNI->def < DepSV.SpillVNI->def) { + // This is an alternative def earlier in the same MBB. + // Hoist the spill as far as possible in SpillMBB. This can ease + // register pressure: + // + // x = def + // y = use x + // s = copy x + // + // Hoisting the spill of s to immediately after the def removes the + // interference between x and y: + // + // x = def + // spill x + // y = use x<kill> + // + // This hoist only helps when the DepSV copy kills its source. + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } else { + // DepSV is in a different block. + if (SpillDepth == ~0u) + SpillDepth = Loops.getLoopDepth(SV.SpillMBB); + + // Also hoist spills to blocks with smaller loop depth, but make sure + // that the new value dominates. Non-phi dependents are always + // dominated, phis need checking. + + const BranchProbability MarginProb(4, 5); // 80% + // Hoist a spill to outer loop if there are multiple dependents (it + // can be beneficial if more than one dependents are hoisted) or + // if DepSV (the hoisting source) is hotter than SV (the hoisting + // destination) (we add a 80% margin to bias a little towards + // loop depth). + bool HoistCondition = + (MBFI.getBlockFreq(DepSV.SpillMBB) >= + (MBFI.getBlockFreq(SV.SpillMBB) * MarginProb)) || + Deps->size() > 1; + + if ((Loops.getLoopDepth(DepSV.SpillMBB) > SpillDepth) && + HoistCondition && + (!DepSVI->first->isPHIDef() || + MDT.dominates(SV.SpillMBB, DepSV.SpillMBB))) { + Changed = true; + DepSV.SpillReg = SV.SpillReg; + DepSV.SpillVNI = SV.SpillVNI; + DepSV.SpillMBB = SV.SpillMBB; + } + } + } + + if (!Changed) + continue; + + // Something changed in DepSVI. Propagate to dependents. + WorkList.insert(&*DepSVI); + + DEBUG(dbgs() << " update " << DepSVI->first->id << '@' + << DepSVI->first->def << " to:\t" << DepSV); + } + } while (!WorkList.empty()); +} + +/// traceSiblingValue - Trace a value that is about to be spilled back to the +/// real defining instructions by looking through sibling copies. Always stay +/// within the range of OrigVNI so the registers are known to carry the same +/// value. +/// +/// Determine if the value is defined by all reloads, so spilling isn't +/// necessary - the value is already in the stack slot. +/// +/// Return a defining instruction that may be a candidate for rematerialization. +/// +MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, + VNInfo *OrigVNI) { + // Check if a cached value already exists. + SibValueMap::iterator SVI; + bool Inserted; + std::tie(SVI, Inserted) = + SibValues.insert(std::make_pair(UseVNI, SibValueInfo(UseReg, UseVNI))); + if (!Inserted) { + DEBUG(dbgs() << "Cached value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << ' ' << SVI->second); + return SVI->second.DefMI; + } + + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << '\n'); + + // List of (Reg, VNI) that have been inserted into SibValues, but need to be + // processed. + SmallVector<std::pair<unsigned, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(UseReg, UseVNI)); + + LiveInterval &OrigLI = LIS.getInterval(Original); + do { + unsigned Reg; + VNInfo *VNI; + std::tie(Reg, VNI) = WorkList.pop_back_val(); + DEBUG(dbgs() << " " << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def + << ":\t"); + + // First check if this value has already been computed. + SVI = SibValues.find(VNI); + assert(SVI != SibValues.end() && "Missing SibValues entry"); + + // Trace through PHI-defs created by live range splitting. + if (VNI->isPHIDef()) { + // Stop at original PHIs. We don't know the value at the + // predecessors. Look up the VNInfo for the current definition + // in OrigLI, to properly determine whether or not this phi was + // added by splitting. + if (VNI->def == OrigLI.getVNInfoAt(VNI->def)->def) { + DEBUG(dbgs() << "orig phi value\n"); + SVI->second.DefByOrigPHI = true; + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); + continue; + } + + // This is a PHI inserted by live range splitting. We could trace the + // live-out value from predecessor blocks, but that search can be very + // expensive if there are many predecessors and many more PHIs as + // generated by tail-dup when it sees an indirectbr. Instead, look at + // all the non-PHI defs that have the same value as OrigVNI. They must + // jointly dominate VNI->def. This is not optimal since VNI may actually + // be jointly dominated by a smaller subset of defs, so there is a change + // we will miss a AllDefsAreReloads optimization. + + // Separate all values dominated by OrigVNI into PHIs and non-PHIs. + SmallVector<VNInfo*, 8> PHIs, NonPHIs; + LiveInterval &LI = LIS.getInterval(Reg); + + for (LiveInterval::vni_iterator VI = LI.vni_begin(), VE = LI.vni_end(); + VI != VE; ++VI) { + VNInfo *VNI2 = *VI; + if (VNI2->isUnused()) + continue; + if (!OrigLI.containsOneValue() && + OrigLI.getVNInfoAt(VNI2->def) != OrigVNI) + continue; + if (VNI2->isPHIDef() && VNI2->def != OrigVNI->def) + PHIs.push_back(VNI2); + else + NonPHIs.push_back(VNI2); + } + DEBUG(dbgs() << "split phi value, checking " << PHIs.size() + << " phi-defs, and " << NonPHIs.size() + << " non-phi/orig defs\n"); + + // Create entries for all the PHIs. Don't add them to the worklist, we + // are processing all of them in one go here. + for (VNInfo *PHI : PHIs) + SibValues.insert(std::make_pair(PHI, SibValueInfo(Reg, PHI))); + + // Add every PHI as a dependent of all the non-PHIs. + for (VNInfo *NonPHI : NonPHIs) { + // Known value? Try an insertion. + std::tie(SVI, Inserted) = + SibValues.insert(std::make_pair(NonPHI, SibValueInfo(Reg, NonPHI))); + // Add all the PHIs as dependents of NonPHI. + SVI->second.Deps.insert(SVI->second.Deps.end(), PHIs.begin(), + PHIs.end()); + // This is the first time we see NonPHI, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(Reg, NonPHI)); + else + // Propagate to all inserted PHIs, not just VNI. + propagateSiblingValue(SVI); + } + + // Next work list item. + continue; + } + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + + // Trace through sibling copies. + if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { + if (isSibling(SrcReg)) { + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); + assert(SrcQ.valueIn() && "Copy from non-existing value"); + // Check if this COPY kills its source. + SVI->second.KillsSource = SrcQ.isKill(); + VNInfo *SrcVNI = SrcQ.valueIn(); + DEBUG(dbgs() << "copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def + << " kill=" << unsigned(SVI->second.KillsSource) << '\n'); + // Known sibling source value? Try an insertion. + std::tie(SVI, Inserted) = SibValues.insert( + std::make_pair(SrcVNI, SibValueInfo(SrcReg, SrcVNI))); + // This is the first time we see Src, add it to the worklist. + if (Inserted) + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + propagateSiblingValue(SVI, VNI); + // Next work list item. + continue; + } + } + + // Track reachable reloads. + SVI->second.DefMI = MI; + SVI->second.SpillMBB = MI->getParent(); + int FI; + if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "reload\n"); + propagateSiblingValue(SVI); + // Next work list item. + continue; + } + + // Potential remat candidate. + DEBUG(dbgs() << "def " << *MI); + SVI->second.AllDefsAreReloads = false; + propagateSiblingValue(SVI); + } while (!WorkList.empty()); + + // Look up the value we were looking for. We already did this lookup at the + // top of the function, but SibValues may have been invalidated. + SVI = SibValues.find(UseVNI); + assert(SVI != SibValues.end() && "Didn't compute requested info"); + DEBUG(dbgs() << " traced to:\t" << SVI->second); + return SVI->second.DefMI; +} + +/// analyzeSiblingValues - Trace values defined by sibling copies back to +/// something that isn't a sibling copy. +/// +/// Keep track of values that may be rematerializable. +void InlineSpiller::analyzeSiblingValues() { + SibValues.clear(); + + // No siblings at all? + if (Edit->getReg() == Original) + return; + + LiveInterval &OrigLI = LIS.getInterval(Original); + for (unsigned Reg : RegsToSpill) { + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), + VE = LI.vni_end(); VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = nullptr; + if (!VNI->isPHIDef()) { + DefMI = LIS.getInstructionFromIndex(VNI->def); + assert(DefMI && "No defining instruction"); + } + // Check possible sibling copies. + if (VNI->isPHIDef() || DefMI->isCopy()) { + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + assert(OrigVNI && "Def outside original live range"); + if (OrigVNI->def != VNI->def) + DefMI = traceSiblingValue(Reg, VNI, OrigVNI); + } + if (DefMI && Edit->checkRematerializable(VNI, DefMI, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } + } + } +} + +/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert +/// a spill at a better location. +bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { + SlotIndex Idx = LIS.getInstructionIndex(CopyMI); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); + assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); + SibValueMap::iterator I = SibValues.find(VNI); + if (I == SibValues.end()) + return false; + + const SibValueInfo &SVI = I->second; + + // Let the normal folding code deal with the boring case. + if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) + return false; + + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + // Conservatively extend the stack slot range to the range of the original + // value. We may be able to do better with stack slot coloring by being more + // careful here. + assert(StackInt && "No stack slot assigned yet."); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); + StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " + << *StackInt << '\n'); + + // Already spilled everywhere. + if (SVI.AllDefsAreReloads) { + DEBUG(dbgs() << "\tno spill needed: " << SVI); + ++NumOmitReloadSpill; + return true; + } + // We are going to spill SVI.SpillVNI immediately after its def, so clear out + // any later spills of the same value. + eliminateRedundantSpills(SibLI, SVI.SpillVNI); + + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); + MachineBasicBlock::iterator MII; + if (SVI.SpillVNI->isPHIDef()) + MII = MBB->SkipPHIsAndLabels(MBB->begin()); + else { + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + assert(DefMI && "Defining instruction disappeared"); + MII = DefMI; + ++MII; + } + // Insert spill without kill flag immediately after def. + TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, + MRI.getRegClass(SVI.SpillReg), &TRI); + --MII; // Point to store instruction. + LIS.InsertMachineInstrInMaps(MII); + DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + + ++NumSpills; + ++NumHoists; + return true; +} + +/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any +/// redundant spills of this value in SLI.reg and sibling copies. +void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { + assert(VNI && "Missing value"); + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(&SLI, VNI)); + assert(StackInt && "No stack slot assigned yet."); + + do { + LiveInterval *LI; + std::tie(LI, VNI) = WorkList.pop_back_val(); + unsigned Reg = LI->reg; + DEBUG(dbgs() << "Checking redundant spills for " + << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); + + // Regs to spill are taken care of. + if (isRegToSpill(Reg)) + continue; + + // Add all of VNI's live range to StackInt. + StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); + + // Find all spills and copies of VNI. + for (MachineRegisterInfo::use_instr_nodbg_iterator + UI = MRI.use_instr_nodbg_begin(Reg), E = MRI.use_instr_nodbg_end(); + UI != E; ) { + MachineInstr *MI = &*(UI++); + if (!MI->isCopy() && !MI->mayStore()) + continue; + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (LI->getVNInfoAt(Idx) != VNI) + continue; + + // Follow sibling copies down the dominator tree. + if (unsigned DstReg = isFullCopyOf(MI, Reg)) { + if (isSibling(DstReg)) { + LiveInterval &DstLI = LIS.getInterval(DstReg); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && "Missing defined value"); + assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); + WorkList.push_back(std::make_pair(&DstLI, DstVNI)); + } + continue; + } + + // Erase spills. + int FI; + if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI); + // eliminateDeadDefs won't normally remove stores, so switch opcode. + MI->setDesc(TII.get(TargetOpcode::KILL)); + DeadDefs.push_back(MI); + ++NumSpillsRemoved; + --NumSpills; + } + } + } while (!WorkList.empty()); +} + + +//===----------------------------------------------------------------------===// +// Rematerialization +//===----------------------------------------------------------------------===// + +/// markValueUsed - Remember that VNI failed to rematerialize, so its defining +/// instruction cannot be eliminated. See through snippet copies +void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { + SmallVector<std::pair<LiveInterval*, VNInfo*>, 8> WorkList; + WorkList.push_back(std::make_pair(LI, VNI)); + do { + std::tie(LI, VNI) = WorkList.pop_back_val(); + if (!UsedValues.insert(VNI).second) + continue; + + if (VNI->isPHIDef()) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock *P : MBB->predecessors()) { + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(P)); + if (PVNI) + WorkList.push_back(std::make_pair(LI, PVNI)); + } + continue; + } + + // Follow snippet copies. + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + if (!SnippetCopies.count(MI)) + continue; + LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); + assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); + assert(SnipVNI && "Snippet undefined before copy"); + WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); + } while (!WorkList.empty()); +} + +/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. +bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, + MachineBasicBlock::iterator MI) { + + // Analyze instruction + SmallVector<std::pair<MachineInstr *, unsigned>, 8> Ops; + MIBundleOperands::VirtRegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(VirtReg.reg, &Ops); + + if (!RI.Reads) + return false; + + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); + VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); + + if (!ParentVNI) { + DEBUG(dbgs() << "\tadding <undef> flags: "); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) + MO.setIsUndef(); + } + DEBUG(dbgs() << UseIdx << '\t' << *MI); + return true; + } + + if (SnippetCopies.count(MI)) + return false; + + // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. + LiveRangeEdit::Remat RM(ParentVNI); + SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); + if (SibI != SibValues.end()) + RM.OrigMI = SibI->second.DefMI; + if (!Edit->canRematerializeAt(RM, UseIdx, false)) { + markValueUsed(&VirtReg, ParentVNI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + return false; + } + + // If the instruction also writes VirtReg.reg, it had better not require the + // same register for uses and defs. + if (RI.Tied) { + markValueUsed(&VirtReg, ParentVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; + } + + // Before rematerializing into a register for a single instruction, try to + // fold a load into the instruction. That avoids allocating a new register. + if (RM.OrigMI->canFoldAsLoad() && + foldMemoryOperand(Ops, RM.OrigMI)) { + Edit->markRematerialized(RM.ParentVNI); + ++NumFoldedLoads; + return true; + } + + // Alocate a new register for the remat. + unsigned NewVReg = Edit->createFrom(Original); + + // Finally we can rematerialize OrigMI before MI. + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, + TRI); + (void)DefIdx; + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' + << *LIS.getInstructionFromIndex(DefIdx)); + + // Replace operands + for (const auto &OpPair : Ops) { + MachineOperand &MO = OpPair.first->getOperand(OpPair.second); + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { + MO.setReg(NewVReg); + MO.setIsKill(); + } + } + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); + + ++NumRemats; + return true; +} + +/// reMaterializeAll - Try to rematerialize as many uses as possible, +/// and trim the live ranges after. +void InlineSpiller::reMaterializeAll() { + // analyzeSiblingValues has already tested all relevant defining instructions. + if (!Edit->anyRematerializable(AA)) + return; + + UsedValues.clear(); + + // Try to remat before all uses of snippets. + bool anyRemat = false; + for (unsigned Reg : RegsToSpill) { + LiveInterval &LI = LIS.getInterval(Reg); + for (MachineRegisterInfo::reg_bundle_iterator + RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); + RegI != E; ) { + MachineInstr *MI = &*(RegI++); + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) + continue; + + anyRemat |= reMaterializeFor(LI, MI); + } + } + if (!anyRemat) + return; + + // Remove any values that were completely rematted. + for (unsigned Reg : RegsToSpill) { + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + MI->addRegisterDead(Reg, &TRI); + if (!MI->allDefsAreDead()) + continue; + DEBUG(dbgs() << "All defs dead: " << *MI); + DeadDefs.push_back(MI); + } + } + + // Eliminate dead code after remat. Note that some snippet copies may be + // deleted here. + if (DeadDefs.empty()) + return; + DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); + + // Get rid of deleted and empty intervals. + unsigned ResultPos = 0; + for (unsigned Reg : RegsToSpill) { + if (!LIS.hasInterval(Reg)) + continue; + + LiveInterval &LI = LIS.getInterval(Reg); + if (LI.empty()) { + Edit->eraseVirtReg(Reg); + continue; + } + + RegsToSpill[ResultPos++] = Reg; + } + RegsToSpill.erase(RegsToSpill.begin() + ResultPos, RegsToSpill.end()); + DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); +} + + +//===----------------------------------------------------------------------===// +// Spilling +//===----------------------------------------------------------------------===// + +/// If MI is a load or store of StackSlot, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { + int FI = 0; + unsigned InstrReg = TII.isLoadFromStackSlot(MI, FI); + bool IsLoad = InstrReg; + if (!IsLoad) + InstrReg = TII.isStoreToStackSlot(MI, FI); + + // We have a stack access. Is it the right register and slot? + if (InstrReg != Reg || FI != StackSlot) + return false; + + DEBUG(dbgs() << "Coalescing stack access: " << *MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + + if (IsLoad) { + ++NumReloadsRemoved; + --NumReloads; + } else { + ++NumSpillsRemoved; + --NumSpills; + } + + return true; +} + +#if !defined(NDEBUG) +// Dump the range of instructions from B to E with their slot indexes. +static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, + MachineBasicBlock::iterator E, + LiveIntervals const &LIS, + const char *const header, + unsigned VReg =0) { + char NextLine = '\n'; + char SlotIndent = '\t'; + + if (std::next(B) == E) { + NextLine = ' '; + SlotIndent = ' '; + } + + dbgs() << '\t' << header << ": " << NextLine; + + for (MachineBasicBlock::iterator I = B; I != E; ++I) { + SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot(); + + // If a register was passed in and this instruction has it as a + // destination that is marked as an early clobber, print the + // early-clobber slot index. + if (VReg) { + MachineOperand *MO = I->findRegisterDefOperand(VReg); + if (MO && MO->isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + + dbgs() << SlotIndent << Idx << '\t' << *I; + } +} +#endif + +/// foldMemoryOperand - Try folding stack slot references in Ops into their +/// instructions. +/// +/// @param Ops Operand indices from analyzeVirtReg(). +/// @param LoadMI Load instruction to use instead of stack slot when non-null. +/// @return True on success. +bool InlineSpiller:: +foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops, + MachineInstr *LoadMI) { + if (Ops.empty()) + return false; + // Don't attempt folding in bundles. + MachineInstr *MI = Ops.front().first; + if (Ops.back().first != MI || MI->isBundled()) + return false; + + bool WasCopy = MI->isCopy(); + unsigned ImpReg = 0; + + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT || + MI->getOpcode() == TargetOpcode::PATCHPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP); + + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied + // operands. + SmallVector<unsigned, 8> FoldOps; + for (const auto &OpPair : Ops) { + unsigned Idx = OpPair.second; + assert(MI == OpPair.first && "Instruction conflict during operand folding"); + MachineOperand &MO = MI->getOperand(Idx); + if (MO.isImplicit()) { + ImpReg = MO.getReg(); + continue; + } + // FIXME: Teach targets to deal with subregs. + if (!SpillSubRegs && MO.getSubReg()) + return false; + // We cannot fold a load instruction into a def. + if (LoadMI && MO.isDef()) + return false; + // Tied use operands should not be passed to foldMemoryOperand. + if (!MI->isRegTiedToDefOperand(Idx)) + FoldOps.push_back(Idx); + } + + MachineInstrSpan MIS(MI); + + MachineInstr *FoldMI = + LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) + : TII.foldMemoryOperand(MI, FoldOps, StackSlot); + if (!FoldMI) + return false; + + // Remove LIS for any dead defs in the original MI not in FoldMI. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + MRI.isReserved(Reg)) { + continue; + } + // Skip non-Defs, including undef uses and internal reads. + if (MO->isUse()) + continue; + MIBundleOperands::PhysRegInfo RI = + MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); + if (RI.FullyDefined) + continue; + // FoldMI does not define this physreg. Remove the LI segment. + assert(MO->isDead() && "Cannot fold physreg def"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + LIS.removePhysRegDefAt(Reg, Idx); + } + + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); + MI->eraseFromParent(); + + // Insert any new instructions other than FoldMI into the LIS maps. + assert(!MIS.empty() && "Unexpected empty span of instructions!"); + for (MachineInstr &MI : MIS) + if (&MI != FoldMI) + LIS.InsertMachineInstrInMaps(&MI); + + // TII.foldMemoryOperand may have left some implicit operands on the + // instruction. Strip them. + if (ImpReg) + for (unsigned i = FoldMI->getNumOperands(); i; --i) { + MachineOperand &MO = FoldMI->getOperand(i - 1); + if (!MO.isReg() || !MO.isImplicit()) + break; + if (MO.getReg() == ImpReg) + FoldMI->RemoveOperand(i - 1); + } + + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, + "folded")); + + if (!WasCopy) + ++NumFolded; + else if (Ops.front().second == 0) + ++NumSpills; + else + ++NumReloads; + return true; +} + +void InlineSpiller::insertReload(unsigned NewVReg, + SlotIndex Idx, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + + MachineInstrSpan MIS(MI); + TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", + NewVReg)); + ++NumReloads; +} + +/// insertSpill - Insert a spill of NewVReg after MI. +void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + + MachineInstrSpan MIS(MI); + TII.storeRegToStackSlot(MBB, std::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(std::next(MI), MIS.end()); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(std::next(MI), MIS.end(), LIS, + "spill")); + ++NumSpills; +} + +/// spillAroundUses - insert spill code around each use of Reg. +void InlineSpiller::spillAroundUses(unsigned Reg) { + DEBUG(dbgs() << "spillAroundUses " << PrintReg(Reg) << '\n'); + LiveInterval &OldLI = LIS.getInterval(Reg); + + // Iterate over instructions using Reg. + for (MachineRegisterInfo::reg_bundle_iterator + RegI = MRI.reg_bundle_begin(Reg), E = MRI.reg_bundle_end(); + RegI != E; ) { + MachineInstr *MI = &*(RegI++); + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(StackSlot) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); + continue; + } + + // Ignore copies to/from snippets. We'll delete them. + if (SnippetCopies.count(MI)) + continue; + + // Stack slot accesses may coalesce away. + if (coalesceStackAccess(MI, Reg)) + continue; + + // Analyze instruction. + SmallVector<std::pair<MachineInstr*, unsigned>, 8> Ops; + MIBundleOperands::VirtRegInfo RI = + MIBundleOperands(MI).analyzeVirtReg(Reg, &Ops); + + // Find the slot index where this instruction reads and writes OldLI. + // This is usually the def slot, except for tied early clobbers. + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) + if (SlotIndex::isSameInstr(Idx, VNI->def)) + Idx = VNI->def; + + // Check for a sibling copy. + unsigned SibReg = isFullCopyOf(MI, Reg); + if (SibReg && isSibling(SibReg)) { + // This may actually be a copy between snippets. + if (isRegToSpill(SibReg)) { + DEBUG(dbgs() << "Found new snippet copy: " << *MI); + SnippetCopies.insert(MI); + continue; + } + if (RI.Writes) { + // Hoist the spill of a sib-reg copy. + if (hoistSpill(OldLI, MI)) { + // This COPY is now dead, the value is already in the stack slot. + MI->getOperand(0).setIsDead(); + DeadDefs.push_back(MI); + continue; + } + } else { + // This is a reload for a sib-reg copy. Drop spills downstream. + LiveInterval &SibLI = LIS.getInterval(SibReg); + eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); + // The COPY will fold to a reload below. + } + } + + // Attempt to fold memory ops. + if (foldMemoryOperand(Ops)) + continue; + + // Create a new virtual register for spill/fill. + // FIXME: Infer regclass from instruction alone. + unsigned NewVReg = Edit->createFrom(Reg); + + if (RI.Reads) + insertReload(NewVReg, Idx, MI); + + // Rewrite instruction operands. + bool hasLiveDef = false; + for (const auto &OpPair : Ops) { + MachineOperand &MO = OpPair.first->getOperand(OpPair.second); + MO.setReg(NewVReg); + if (MO.isUse()) { + if (!OpPair.first->isRegTiedToDefOperand(OpPair.second)) + MO.setIsKill(); + } else { + if (!MO.isDead()) + hasLiveDef = true; + } + } + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); + + // FIXME: Use a second vreg if instruction has no tied ops. + if (RI.Writes) + if (hasLiveDef) + insertSpill(NewVReg, true, MI); + } +} + +/// spillAll - Spill all registers remaining after rematerialization. +void InlineSpiller::spillAll() { + // Update LiveStacks now that we are committed to spilling. + if (StackSlot == VirtRegMap::NO_STACK_SLOT) { + StackSlot = VRM.assignVirt2StackSlot(Original); + StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); + StackInt->getNextValue(SlotIndex(), LSS.getVNInfoAllocator()); + } else + StackInt = &LSS.getInterval(StackSlot); + + if (Original != Edit->getReg()) + VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); + + assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); + for (unsigned Reg : RegsToSpill) + StackInt->MergeSegmentsInAsValue(LIS.getInterval(Reg), + StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); + + // Spill around uses of all RegsToSpill. + for (unsigned Reg : RegsToSpill) + spillAroundUses(Reg); + + // Hoisted spills may cause dead code. + if (!DeadDefs.empty()) { + DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); + Edit->eliminateDeadDefs(DeadDefs, RegsToSpill); + } + + // Finally delete the SnippetCopies. + for (unsigned Reg : RegsToSpill) { + for (MachineRegisterInfo::reg_instr_iterator + RI = MRI.reg_instr_begin(Reg), E = MRI.reg_instr_end(); + RI != E; ) { + MachineInstr *MI = &*(RI++); + assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + // FIXME: Do this with a LiveRangeEdit callback. + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + } + + // Delete all spilled registers. + for (unsigned Reg : RegsToSpill) + Edit->eraseVirtReg(Reg); +} + +void InlineSpiller::spill(LiveRangeEdit &edit) { + ++NumSpilledRanges; + Edit = &edit; + assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) + && "Trying to spill a stack slot."); + // Share a stack slot among all descendants of Original. + Original = VRM.getOriginal(edit.getReg()); + StackSlot = VRM.getStackSlot(Original); + StackInt = nullptr; + + DEBUG(dbgs() << "Inline spilling " + << TRI.getRegClassName(MRI.getRegClass(edit.getReg())) + << ':' << edit.getParent() + << "\nFrom original " << PrintReg(Original) << '\n'); + assert(edit.getParent().isSpillable() && + "Attempting to spill already spilled value."); + assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); + + collectRegsToSpill(); + analyzeSiblingValues(); + reMaterializeAll(); + + // Remat may handle everything. + if (!RegsToSpill.empty()) + spillAll(); + + Edit->calculateRegClassAndHint(MF, Loops, MBFI); +} diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp new file mode 100644 index 0000000..f8cc247 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -0,0 +1,250 @@ +//===-- InterferenceCache.cpp - Caching per-block interference ---------*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#include "InterferenceCache.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +// Static member used for null interference cursors. +const InterferenceCache::BlockInterference + InterferenceCache::Cursor::NoInterference; + +// Initializes PhysRegEntries (instead of a SmallVector, PhysRegEntries is a +// buffer of size NumPhysRegs to speed up alloc/clear for targets with large +// reg files). Calloced memory is used for good form, and quites tools like +// Valgrind too, but zero initialized memory is not required by the algorithm: +// this is because PhysRegEntries works like a SparseSet and its entries are +// only valid when there is a corresponding CacheEntries assignment. There is +// also support for when pass managers are reused for targets with different +// numbers of PhysRegs: in this case PhysRegEntries is freed and reinitialized. +void InterferenceCache::reinitPhysRegEntries() { + if (PhysRegEntriesCount == TRI->getNumRegs()) return; + free(PhysRegEntries); + PhysRegEntriesCount = TRI->getNumRegs(); + PhysRegEntries = (unsigned char*) + calloc(PhysRegEntriesCount, sizeof(unsigned char)); +} + +void InterferenceCache::init(MachineFunction *mf, + LiveIntervalUnion *liuarray, + SlotIndexes *indexes, + LiveIntervals *lis, + const TargetRegisterInfo *tri) { + MF = mf; + LIUArray = liuarray; + TRI = tri; + reinitPhysRegEntries(); + for (unsigned i = 0; i != CacheEntries; ++i) + Entries[i].clear(mf, indexes, lis); +} + +InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { + unsigned E = PhysRegEntries[PhysReg]; + if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { + if (!Entries[E].valid(LIUArray, TRI)) + Entries[E].revalidate(LIUArray, TRI); + return &Entries[E]; + } + // No valid entry exists, pick the next round-robin entry. + E = RoundRobin; + if (++RoundRobin == CacheEntries) + RoundRobin = 0; + for (unsigned i = 0; i != CacheEntries; ++i) { + // Skip entries that are in use. + if (Entries[E].hasRefs()) { + if (++E == CacheEntries) + E = 0; + continue; + } + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; + } + llvm_unreachable("Ran out of interference cache entries."); +} + +/// revalidate - LIU contents have changed, update tags. +void InterferenceCache::Entry::revalidate(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + // Invalidate all block entries. + ++Tag; + // Invalidate all iterators. + PrevPos = SlotIndex(); + unsigned i = 0; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) + RegUnits[i].VirtTag = LIUArray[*Units].getTag(); +} + +void InterferenceCache::Entry::reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF) { + assert(!hasRefs() && "Cannot reset cache entry with references"); + // LIU's changed, invalidate cache. + ++Tag; + PhysReg = physReg; + Blocks.resize(MF->getNumBlockIDs()); + + // Reset iterators. + PrevPos = SlotIndex(); + RegUnits.clear(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + RegUnits.push_back(LIUArray[*Units]); + RegUnits.back().Fixed = &LIS->getRegUnit(*Units); + } +} + +bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + unsigned i = 0, e = RegUnits.size(); + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units, ++i) { + if (i == e) + return false; + if (LIUArray[*Units].changedSince(RegUnits[i].VirtTag)) + return false; + } + return i == e; +} + +void InterferenceCache::Entry::update(unsigned MBBNum) { + SlotIndex Start, Stop; + std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + + // Use advanceTo only when possible. + if (PrevPos != Start) { + if (!PrevPos.isValid() || Start < PrevPos) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.find(Start); + RUI.FixedI = RUI.Fixed->find(Start); + } + } else { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + RegUnitInfo &RUI = RegUnits[i]; + RUI.VirtI.advanceTo(Start); + if (RUI.FixedI != RUI.Fixed->end()) + RUI.FixedI = RUI.Fixed->advanceTo(RUI.FixedI, Start); + } + } + PrevPos = Start; + } + + MachineFunction::const_iterator MFI = + MF->getBlockNumbered(MBBNum)->getIterator(); + BlockInterference *BI = &Blocks[MBBNum]; + ArrayRef<SlotIndex> RegMaskSlots; + ArrayRef<const uint32_t*> RegMaskBits; + for (;;) { + BI->Tag = Tag; + BI->First = BI->Last = SlotIndex(); + + // Check for first interference from virtregs. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; + if (!I.valid()) + continue; + SlotIndex StartI = I.start(); + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + // Same thing for fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::const_iterator I = RegUnits[i].FixedI; + LiveInterval::const_iterator E = RegUnits[i].Fixed->end(); + if (I == E) + continue; + SlotIndex StartI = I->start; + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + // Also check for register mask interference. + RegMaskSlots = LIS->getRegMaskSlotsInBlock(MBBNum); + RegMaskBits = LIS->getRegMaskBitsInBlock(MBBNum); + SlotIndex Limit = BI->First.isValid() ? BI->First : Stop; + for (unsigned i = 0, e = RegMaskSlots.size(); + i != e && RegMaskSlots[i] < Limit; ++i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i], PhysReg)) { + // Register mask i clobbers PhysReg before the LIU interference. + BI->First = RegMaskSlots[i]; + break; + } + + PrevPos = Stop; + if (BI->First.isValid()) + break; + + // No interference in this block? Go ahead and precompute the next block. + if (++MFI == MF->end()) + return; + MBBNum = MFI->getNumber(); + BI = &Blocks[MBBNum]; + if (BI->Tag == Tag) + return; + std::tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + } + + // Check for last interference in block. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveIntervalUnion::SegmentIter &I = RegUnits[i].VirtI; + if (!I.valid() || I.start() >= Stop) + continue; + I.advanceTo(Stop); + bool Backup = !I.valid() || I.start() >= Stop; + if (Backup) + --I; + SlotIndex StopI = I.stop(); + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + + // Fixed interference. + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + LiveInterval::iterator &I = RegUnits[i].FixedI; + LiveRange *LR = RegUnits[i].Fixed; + if (I == LR->end() || I->start >= Stop) + continue; + I = LR->advanceTo(I, Stop); + bool Backup = I == LR->end() || I->start >= Stop; + if (Backup) + --I; + SlotIndex StopI = I->end; + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } + + // Also check for register mask interference. + SlotIndex Limit = BI->Last.isValid() ? BI->Last : Start; + for (unsigned i = RegMaskSlots.size(); + i && RegMaskSlots[i-1].getDeadSlot() > Limit; --i) + if (MachineOperand::clobbersPhysReg(RegMaskBits[i-1], PhysReg)) { + // Register mask i-1 clobbers PhysReg after the LIU interference. + // Model the regmask clobber as a dead def. + BI->Last = RegMaskSlots[i-1].getDeadSlot(); + break; + } +} diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h new file mode 100644 index 0000000..18aa5c7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -0,0 +1,238 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference from LiveIntervalUnions, +// fixed RegUnit interference, and register masks. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_INTERFERENCECACHE_H +#define LLVM_LIB_CODEGEN_INTERFERENCECACHE_H + +#include "llvm/CodeGen/LiveIntervalUnion.h" + +namespace llvm { + +class LiveIntervals; + +class LLVM_LIBRARY_VISIBILITY InterferenceCache { + const TargetRegisterInfo *TRI; + LiveIntervalUnion *LIUArray; + MachineFunction *MF; + + /// BlockInterference - information about the interference in a single basic + /// block. + struct BlockInterference { + BlockInterference() : Tag(0) {} + unsigned Tag; + SlotIndex First; + SlotIndex Last; + }; + + /// Entry - A cache entry containing interference information for all aliases + /// of PhysReg in all basic blocks. + class Entry { + /// PhysReg - The register currently represented. + unsigned PhysReg; + + /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions + /// change. + unsigned Tag; + + /// RefCount - The total number of Cursor instances referring to this Entry. + unsigned RefCount; + + /// MF - The current function. + MachineFunction *MF; + + /// Indexes - Mapping block numbers to SlotIndex ranges. + SlotIndexes *Indexes; + + /// LIS - Used for accessing register mask interference maps. + LiveIntervals *LIS; + + /// PrevPos - The previous position the iterators were moved to. + SlotIndex PrevPos; + + /// RegUnitInfo - Information tracked about each RegUnit in PhysReg. + /// When PrevPos is set, the iterators are valid as if advanceTo(PrevPos) + /// had just been called. + struct RegUnitInfo { + /// Iterator pointing into the LiveIntervalUnion containing virtual + /// register interference. + LiveIntervalUnion::SegmentIter VirtI; + + /// Tag of the LIU last time we looked. + unsigned VirtTag; + + /// Fixed interference in RegUnit. + LiveRange *Fixed; + + /// Iterator pointing into the fixed RegUnit interference. + LiveInterval::iterator FixedI; + + RegUnitInfo(LiveIntervalUnion &LIU) + : VirtTag(LIU.getTag()), Fixed(nullptr) { + VirtI.setMap(LIU.getMap()); + } + }; + + /// Info for each RegUnit in PhysReg. It is very rare ofr a PHysReg to have + /// more than 4 RegUnits. + SmallVector<RegUnitInfo, 4> RegUnits; + + /// Blocks - Interference for each block in the function. + SmallVector<BlockInterference, 8> Blocks; + + /// update - Recompute Blocks[MBBNum] + void update(unsigned MBBNum); + + public: + Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(nullptr), LIS(nullptr) {} + + void clear(MachineFunction *mf, SlotIndexes *indexes, LiveIntervals *lis) { + assert(!hasRefs() && "Cannot clear cache entry with references"); + PhysReg = 0; + MF = mf; + Indexes = indexes; + LIS = lis; + } + + unsigned getPhysReg() const { return PhysReg; } + + void addRef(int Delta) { RefCount += Delta; } + + bool hasRefs() const { return RefCount > 0; } + + void revalidate(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// valid - Return true if this is a valid entry for physReg. + bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// reset - Initialize entry to represent physReg's aliases. + void reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF); + + /// get - Return an up to date BlockInterference. + BlockInterference *get(unsigned MBBNum) { + if (Blocks[MBBNum].Tag != Tag) + update(MBBNum); + return &Blocks[MBBNum]; + } + }; + + // We don't keep a cache entry for every physical register, that would use too + // much memory. Instead, a fixed number of cache entries are used in a round- + // robin manner. + enum { CacheEntries = 32 }; + + // Point to an entry for each physreg. The entry pointed to may not be up to + // date, and it may have been reused for a different physreg. + unsigned char* PhysRegEntries; + size_t PhysRegEntriesCount; + + // Next round-robin entry to be picked. + unsigned RoundRobin; + + // The actual cache entries. + Entry Entries[CacheEntries]; + + // get - Get a valid entry for PhysReg. + Entry *get(unsigned PhysReg); + +public: + InterferenceCache() + : TRI(nullptr), LIUArray(nullptr), MF(nullptr), PhysRegEntries(nullptr), + PhysRegEntriesCount(0), RoundRobin(0) {} + + ~InterferenceCache() { + free(PhysRegEntries); + } + + void reinitPhysRegEntries(); + + /// init - Prepare cache for a new function. + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, LiveIntervals*, + const TargetRegisterInfo *); + + /// getMaxCursors - Return the maximum number of concurrent cursors that can + /// be supported. + unsigned getMaxCursors() const { return CacheEntries; } + + /// Cursor - The primary query interface for the block interference cache. + class Cursor { + Entry *CacheEntry; + const BlockInterference *Current; + static const BlockInterference NoInterference; + + void setEntry(Entry *E) { + Current = nullptr; + // Update reference counts. Nothing happens when RefCount reaches 0, so + // we don't have to check for E == CacheEntry etc. + if (CacheEntry) + CacheEntry->addRef(-1); + CacheEntry = E; + if (CacheEntry) + CacheEntry->addRef(+1); + } + + public: + /// Cursor - Create a dangling cursor. + Cursor() : CacheEntry(nullptr), Current(nullptr) {} + ~Cursor() { setEntry(nullptr); } + + Cursor(const Cursor &O) : CacheEntry(nullptr), Current(nullptr) { + setEntry(O.CacheEntry); + } + + Cursor &operator=(const Cursor &O) { + setEntry(O.CacheEntry); + return *this; + } + + /// setPhysReg - Point this cursor to PhysReg's interference. + void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) { + // Release reference before getting a new one. That guarantees we can + // actually have CacheEntries live cursors. + setEntry(nullptr); + if (PhysReg) + setEntry(Cache.get(PhysReg)); + } + + /// moveTo - Move cursor to basic block MBBNum. + void moveToBlock(unsigned MBBNum) { + Current = CacheEntry ? CacheEntry->get(MBBNum) : &NoInterference; + } + + /// hasInterference - Return true if the current block has any interference. + bool hasInterference() { + return Current->First.isValid(); + } + + /// first - Return the starting index of the first interfering range in the + /// current block. + SlotIndex first() { + return Current->First; + } + + /// last - Return the ending index of the last interfering range in the + /// current block. + SlotIndex last() { + return Current->Last; + } + }; + + friend class Cursor; +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp new file mode 100644 index 0000000..724f1d6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -0,0 +1,286 @@ +//=----------------------- InterleavedAccessPass.cpp -----------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Interleaved Access pass, which identifies +// interleaved memory accesses and transforms into target specific intrinsics. +// +// An interleaved load reads data from memory into several vectors, with +// DE-interleaving the data on a factor. An interleaved store writes several +// vectors to memory with RE-interleaving the data on a factor. +// +// As interleaved accesses are hard to be identified in CodeGen (mainly because +// the VECTOR_SHUFFLE DAG node is quite different from the shufflevector IR), +// we identify and transform them to intrinsics in this pass. So the intrinsics +// can be easily matched into target specific instructions later in CodeGen. +// +// E.g. An interleaved load (Factor = 2): +// %wide.vec = load <8 x i32>, <8 x i32>* %ptr +// %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6> +// %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7> +// +// It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2 +// intrinsic in ARM backend. +// +// E.g. An interleaved store (Factor = 3): +// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, +// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> +// store <12 x i32> %i.vec, <12 x i32>* %ptr +// +// It could be transformed into a st3 intrinsic in AArch64 backend or a vst3 +// intrinsic in ARM backend. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "interleaved-access" + +static cl::opt<bool> LowerInterleavedAccesses( + "lower-interleaved-accesses", + cl::desc("Enable lowering interleaved accesses to intrinsics"), + cl::init(true), cl::Hidden); + +static unsigned MaxFactor; // The maximum supported interleave factor. + +namespace llvm { +static void initializeInterleavedAccessPass(PassRegistry &); +} + +namespace { + +class InterleavedAccess : public FunctionPass { + +public: + static char ID; + InterleavedAccess(const TargetMachine *TM = nullptr) + : FunctionPass(ID), TM(TM), TLI(nullptr) { + initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { return "Interleaved Access Pass"; } + + bool runOnFunction(Function &F) override; + +private: + const TargetMachine *TM; + const TargetLowering *TLI; + + /// \brief Transform an interleaved load into target specific intrinsics. + bool lowerInterleavedLoad(LoadInst *LI, + SmallVector<Instruction *, 32> &DeadInsts); + + /// \brief Transform an interleaved store into target specific intrinsics. + bool lowerInterleavedStore(StoreInst *SI, + SmallVector<Instruction *, 32> &DeadInsts); +}; +} // end anonymous namespace. + +char InterleavedAccess::ID = 0; +INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access", + "Lower interleaved memory accesses to target specific intrinsics", + false, false) + +FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) { + return new InterleavedAccess(TM); +} + +/// \brief Check if the mask is a DE-interleave mask of the given factor +/// \p Factor like: +/// <Index, Index+Factor, ..., Index+(NumElts-1)*Factor> +static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor, + unsigned &Index) { + // Check all potential start indices from 0 to (Factor - 1). + for (Index = 0; Index < Factor; Index++) { + unsigned i = 0; + + // Check that elements are in ascending order by Factor. Ignore undef + // elements. + for (; i < Mask.size(); i++) + if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor) + break; + + if (i == Mask.size()) + return true; + } + + return false; +} + +/// \brief Check if the mask is a DE-interleave mask for an interleaved load. +/// +/// E.g. DE-interleave masks (Factor = 2) could be: +/// <0, 2, 4, 6> (mask of index 0 to extract even elements) +/// <1, 3, 5, 7> (mask of index 1 to extract odd elements) +static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, + unsigned &Index) { + if (Mask.size() < 2) + return false; + + // Check potential Factors. + for (Factor = 2; Factor <= MaxFactor; Factor++) + if (isDeInterleaveMaskOfFactor(Mask, Factor, Index)) + return true; + + return false; +} + +/// \brief Check if the mask is RE-interleave mask for an interleaved store. +/// +/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...> +/// +/// E.g. The RE-interleave mask (Factor = 2) could be: +/// <0, 4, 1, 5, 2, 6, 3, 7> +static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) { + unsigned NumElts = Mask.size(); + if (NumElts < 4) + return false; + + // Check potential Factors. + for (Factor = 2; Factor <= MaxFactor; Factor++) { + if (NumElts % Factor) + continue; + + unsigned NumSubElts = NumElts / Factor; + if (!isPowerOf2_32(NumSubElts)) + continue; + + // Check whether each element matchs the RE-interleaved rule. Ignore undef + // elements. + unsigned i = 0; + for (; i < NumElts; i++) + if (Mask[i] >= 0 && + static_cast<unsigned>(Mask[i]) != + (i % Factor) * NumSubElts + i / Factor) + break; + + // Find a RE-interleaved mask of current factor. + if (i == NumElts) + return true; + } + + return false; +} + +bool InterleavedAccess::lowerInterleavedLoad( + LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) { + if (!LI->isSimple()) + return false; + + SmallVector<ShuffleVectorInst *, 4> Shuffles; + + // Check if all users of this load are shufflevectors. + for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) { + ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI); + if (!SVI || !isa<UndefValue>(SVI->getOperand(1))) + return false; + + Shuffles.push_back(SVI); + } + + if (Shuffles.empty()) + return false; + + unsigned Factor, Index; + + // Check if the first shufflevector is DE-interleave shuffle. + if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index)) + return false; + + // Holds the corresponding index for each DE-interleave shuffle. + SmallVector<unsigned, 4> Indices; + Indices.push_back(Index); + + Type *VecTy = Shuffles[0]->getType(); + + // Check if other shufflevectors are also DE-interleaved of the same type + // and factor as the first shufflevector. + for (unsigned i = 1; i < Shuffles.size(); i++) { + if (Shuffles[i]->getType() != VecTy) + return false; + + if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor, + Index)) + return false; + + Indices.push_back(Index); + } + + DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); + + // Try to create target specific intrinsics to replace the load and shuffles. + if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) + return false; + + for (auto SVI : Shuffles) + DeadInsts.push_back(SVI); + + DeadInsts.push_back(LI); + return true; +} + +bool InterleavedAccess::lowerInterleavedStore( + StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) { + if (!SI->isSimple()) + return false; + + ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand()); + if (!SVI || !SVI->hasOneUse()) + return false; + + // Check if the shufflevector is RE-interleave shuffle. + unsigned Factor; + if (!isReInterleaveMask(SVI->getShuffleMask(), Factor)) + return false; + + DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); + + // Try to create target specific intrinsics to replace the store and shuffle. + if (!TLI->lowerInterleavedStore(SI, SVI, Factor)) + return false; + + // Already have a new target specific interleaved store. Erase the old store. + DeadInsts.push_back(SI); + DeadInsts.push_back(SVI); + return true; +} + +bool InterleavedAccess::runOnFunction(Function &F) { + if (!TM || !LowerInterleavedAccesses) + return false; + + DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); + + TLI = TM->getSubtargetImpl(F)->getTargetLowering(); + MaxFactor = TLI->getMaxSupportedInterleaveFactor(); + + // Holds dead instructions that will be erased later. + SmallVector<Instruction *, 32> DeadInsts; + bool Changed = false; + + for (auto &I : instructions(F)) { + if (LoadInst *LI = dyn_cast<LoadInst>(&I)) + Changed |= lowerInterleavedLoad(LI, DeadInsts); + + if (StoreInst *SI = dyn_cast<StoreInst>(&I)) + Changed |= lowerInterleavedStore(SI, DeadInsts); + } + + for (auto I : DeadInsts) + I->eraseFromParent(); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp new file mode 100644 index 0000000..2962f87 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -0,0 +1,608 @@ +//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the IntrinsicLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +template <class ArgIt> +static void EnsureFunctionExists(Module &M, const char *Name, + ArgIt ArgBegin, ArgIt ArgEnd, + Type *RetTy) { + // Insert a correctly-typed definition now. + std::vector<Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back(I->getType()); + M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false)); +} + +static void EnsureFPIntrinsicsExist(Module &M, Function &Fn, + const char *FName, + const char *DName, const char *LDName) { + // Insert definitions for all the floating point types. + switch((int)Fn.arg_begin()->getType()->getTypeID()) { + case Type::FloatTyID: + EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(), + Type::getFloatTy(M.getContext())); + break; + case Type::DoubleTyID: + EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(), + Type::getDoubleTy(M.getContext())); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(), + Fn.arg_begin()->getType()); + break; + } +} + +/// ReplaceCallWith - This function is used when we want to lower an intrinsic +/// call to a call of an external function. This handles hard cases such as +/// when there was already a prototype for the external function, and if that +/// prototype doesn't match the arguments we expect to pass in. +template <class ArgIt> +static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI, + ArgIt ArgBegin, ArgIt ArgEnd, + Type *RetTy) { + // If we haven't already looked up this function, check to see if the + // program already contains a function with this name. + Module *M = CI->getModule(); + // Get or insert the definition now. + std::vector<Type *> ParamTys; + for (ArgIt I = ArgBegin; I != ArgEnd; ++I) + ParamTys.push_back((*I)->getType()); + Constant* FCache = M->getOrInsertFunction(NewFn, + FunctionType::get(RetTy, ParamTys, false)); + + IRBuilder<> Builder(CI->getParent(), CI->getIterator()); + SmallVector<Value *, 8> Args(ArgBegin, ArgEnd); + CallInst *NewCI = Builder.CreateCall(FCache, Args); + NewCI->setName(CI->getName()); + if (!CI->use_empty()) + CI->replaceAllUsesWith(NewCI); + return NewCI; +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + +void IntrinsicLowering::AddPrototypes(Module &M) { + LLVMContext &Context = M.getContext(); + for (auto &F : M) + if (F.isDeclaration() && !F.use_empty()) + switch (F.getIntrinsicID()) { + default: break; + case Intrinsic::setjmp: + EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(), + Type::getInt32Ty(M.getContext())); + break; + case Intrinsic::longjmp: + EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(), + Type::getVoidTy(M.getContext())); + break; + case Intrinsic::siglongjmp: + EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(), + Type::getVoidTy(M.getContext())); + break; + case Intrinsic::memcpy: + M.getOrInsertFunction("memcpy", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + DL.getIntPtrType(Context), nullptr); + break; + case Intrinsic::memmove: + M.getOrInsertFunction("memmove", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + DL.getIntPtrType(Context), nullptr); + break; + case Intrinsic::memset: + M.getOrInsertFunction("memset", + Type::getInt8PtrTy(Context), + Type::getInt8PtrTy(Context), + Type::getInt32Ty(M.getContext()), + DL.getIntPtrType(Context), nullptr); + break; + case Intrinsic::sqrt: + EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl"); + break; + case Intrinsic::sin: + EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl"); + break; + case Intrinsic::cos: + EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl"); + break; + case Intrinsic::pow: + EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl"); + break; + case Intrinsic::log: + EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl"); + break; + case Intrinsic::log2: + EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l"); + break; + case Intrinsic::log10: + EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l"); + break; + case Intrinsic::exp: + EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl"); + break; + case Intrinsic::exp2: + EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l"); + break; + } +} + +/// LowerBSWAP - Emit the code to lower bswap of V before the specified +/// instruction IP. +static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) { + assert(V->getType()->isIntegerTy() && "Can't bswap a non-integer type!"); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + + IRBuilder<> Builder(IP); + + switch(BitSize) { + default: llvm_unreachable("Unhandled type size of value to byteswap!"); + case 16: { + Value *Tmp1 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.1"); + V = Builder.CreateOr(Tmp1, Tmp2, "bswap.i16"); + break; + } + case 32: { + Value *Tmp4 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.4"); + Value *Tmp3 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.3"); + Value *Tmp2 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.2"); + Value *Tmp1 = Builder.CreateLShr(V,ConstantInt::get(V->getType(), 24), + "bswap.1"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF0000), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt32Ty(Context), 0xFF00), + "bswap.and2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or1"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or2"); + V = Builder.CreateOr(Tmp4, Tmp2, "bswap.i32"); + break; + } + case 64: { + Value *Tmp8 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 56), + "bswap.8"); + Value *Tmp7 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 40), + "bswap.7"); + Value *Tmp6 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 24), + "bswap.6"); + Value *Tmp5 = Builder.CreateShl(V, ConstantInt::get(V->getType(), 8), + "bswap.5"); + Value* Tmp4 = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 8), + "bswap.4"); + Value* Tmp3 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 24), + "bswap.3"); + Value* Tmp2 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 40), + "bswap.2"); + Value* Tmp1 = Builder.CreateLShr(V, + ConstantInt::get(V->getType(), 56), + "bswap.1"); + Tmp7 = Builder.CreateAnd(Tmp7, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000000000ULL), + "bswap.and7"); + Tmp6 = Builder.CreateAnd(Tmp6, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000000000ULL), + "bswap.and6"); + Tmp5 = Builder.CreateAnd(Tmp5, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00000000ULL), + "bswap.and5"); + Tmp4 = Builder.CreateAnd(Tmp4, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF000000ULL), + "bswap.and4"); + Tmp3 = Builder.CreateAnd(Tmp3, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF0000ULL), + "bswap.and3"); + Tmp2 = Builder.CreateAnd(Tmp2, + ConstantInt::get(Type::getInt64Ty(Context), + 0xFF00ULL), + "bswap.and2"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp7, "bswap.or1"); + Tmp6 = Builder.CreateOr(Tmp6, Tmp5, "bswap.or2"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp3, "bswap.or3"); + Tmp2 = Builder.CreateOr(Tmp2, Tmp1, "bswap.or4"); + Tmp8 = Builder.CreateOr(Tmp8, Tmp6, "bswap.or5"); + Tmp4 = Builder.CreateOr(Tmp4, Tmp2, "bswap.or6"); + V = Builder.CreateOr(Tmp8, Tmp4, "bswap.i64"); + break; + } + } + return V; +} + +/// LowerCTPOP - Emit the code to lower ctpop of V before the specified +/// instruction IP. +static Value *LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP) { + assert(V->getType()->isIntegerTy() && "Can't ctpop a non-integer type!"); + + static const uint64_t MaskValues[6] = { + 0x5555555555555555ULL, 0x3333333333333333ULL, + 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL, + 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL + }; + + IRBuilder<> Builder(IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + unsigned WordSize = (BitSize + 63) / 64; + Value *Count = ConstantInt::get(V->getType(), 0); + + for (unsigned n = 0; n < WordSize; ++n) { + Value *PartValue = V; + for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize); + i <<= 1, ++ct) { + Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]); + Value *LHS = Builder.CreateAnd(PartValue, MaskCst, "cppop.and1"); + Value *VShift = Builder.CreateLShr(PartValue, + ConstantInt::get(V->getType(), i), + "ctpop.sh"); + Value *RHS = Builder.CreateAnd(VShift, MaskCst, "cppop.and2"); + PartValue = Builder.CreateAdd(LHS, RHS, "ctpop.step"); + } + Count = Builder.CreateAdd(PartValue, Count, "ctpop.part"); + if (BitSize > 64) { + V = Builder.CreateLShr(V, ConstantInt::get(V->getType(), 64), + "ctpop.part.sh"); + BitSize -= 64; + } + } + + return Count; +} + +/// LowerCTLZ - Emit the code to lower ctlz of V before the specified +/// instruction IP. +static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { + + IRBuilder<> Builder(IP); + + unsigned BitSize = V->getType()->getPrimitiveSizeInBits(); + for (unsigned i = 1; i < BitSize; i <<= 1) { + Value *ShVal = ConstantInt::get(V->getType(), i); + ShVal = Builder.CreateLShr(V, ShVal, "ctlz.sh"); + V = Builder.CreateOr(V, ShVal, "ctlz.step"); + } + + V = Builder.CreateNot(V); + return LowerCTPOP(Context, V, IP); +} + +static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, + const char *Dname, + const char *LDname) { + CallSite CS(CI); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { + default: llvm_unreachable("Invalid type in intrinsic"); + case Type::FloatTyID: + ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), + Type::getFloatTy(CI->getContext())); + break; + case Type::DoubleTyID: + ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), + Type::getDoubleTy(CI->getContext())); + break; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), + CI->getArgOperand(0)->getType()); + break; + } +} + +void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { + IRBuilder<> Builder(CI); + LLVMContext &Context = CI->getContext(); + + const Function *Callee = CI->getCalledFunction(); + assert(Callee && "Cannot lower an indirect call!"); + + CallSite CS(CI); + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ + Callee->getName() + "'!"); + default: + report_fatal_error("Code generator does not support intrinsic function '"+ + Callee->getName()+"'!"); + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + Value *V = CI->getArgOperand(0); + CI->replaceAllUsesWith(V); + break; + } + + // The setjmp/longjmp intrinsics should only exist in the code if it was + // never optimized (ie, right out of the CFE), or if it has been hacked on + // by the lowerinvoke pass. In both cases, the right thing to do is to + // convert the call to an explicit setjmp or longjmp call. + case Intrinsic::setjmp: { + Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), + Type::getInt32Ty(Context)); + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(V); + break; + } + case Intrinsic::sigsetjmp: + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + + case Intrinsic::longjmp: { + ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), + Type::getVoidTy(Context)); + break; + } + + case Intrinsic::siglongjmp: { + // Insert the call to abort + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), + Type::getVoidTy(Context)); + break; + } + case Intrinsic::ctpop: + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::bswap: + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::ctlz: + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); + break; + + case Intrinsic::cttz: { + // cttz(x) -> ctpop(~X & (X-1)) + Value *Src = CI->getArgOperand(0); + Value *NotSrc = Builder.CreateNot(Src); + NotSrc->setName(Src->getName() + ".not"); + Value *SrcM1 = ConstantInt::get(Src->getType(), 1); + SrcM1 = Builder.CreateSub(Src, SrcM1); + Src = LowerCTPOP(Context, Builder.CreateAnd(NotSrc, SrcM1), CI); + CI->replaceAllUsesWith(Src); + break; + } + + case Intrinsic::stacksave: + case Intrinsic::stackrestore: { + if (!Warned) + errs() << "WARNING: this target does not support the llvm.stack" + << (Callee->getIntrinsicID() == Intrinsic::stacksave ? + "save" : "restore") << " intrinsic.\n"; + Warned = true; + if (Callee->getIntrinsicID() == Intrinsic::stacksave) + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + break; + } + + case Intrinsic::get_dynamic_area_offset: + errs() << "WARNING: this target does not support the custom llvm.get." + "dynamic.area.offset. It is being lowered to a constant 0\n"; + // Just lower it to a constant 0 because for most targets + // @llvm.get.dynamic.area.offset is lowered to zero. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 0)); + break; + case Intrinsic::returnaddress: + case Intrinsic::frameaddress: + errs() << "WARNING: this target does not support the llvm." + << (Callee->getIntrinsicID() == Intrinsic::returnaddress ? + "return" : "frame") << "address intrinsic.\n"; + CI->replaceAllUsesWith(ConstantPointerNull::get( + cast<PointerType>(CI->getType()))); + break; + + case Intrinsic::prefetch: + break; // Simply strip out prefetches on unsupported architectures + + case Intrinsic::pcmarker: + break; // Simply strip out pcmarker on unsupported architectures + case Intrinsic::readcyclecounter: { + errs() << "WARNING: this target does not support the llvm.readcyclecoun" + << "ter intrinsic. It is being lowered to a constant 0\n"; + CI->replaceAllUsesWith(ConstantInt::get(Type::getInt64Ty(Context), 0)); + break; + } + + case Intrinsic::dbg_declare: + break; // Simply strip out debugging intrinsics + + case Intrinsic::eh_typeid_for: + // Return something different to eh_selector. + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Just drop the annotation, but forward the value + CI->replaceAllUsesWith(CI->getOperand(0)); + break; + + case Intrinsic::assume: + case Intrinsic::var_annotation: + break; // Strip out these intrinsics + + case Intrinsic::memcpy: { + Type *IntPtr = DL.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); + Ops[2] = Size; + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::memmove: { + Type *IntPtr = DL.getIntPtrType(Context); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); + Ops[2] = Size; + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::memset: { + Value *Op0 = CI->getArgOperand(0); + Type *IntPtr = DL.getIntPtrType(Op0->getType()); + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, + /* isSigned */ false); + Value *Ops[3]; + Ops[0] = Op0; + // Extend the amount to i32. + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), + Type::getInt32Ty(Context), + /* isSigned */ false); + Ops[2] = Size; + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); + break; + } + case Intrinsic::sqrt: { + ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl"); + break; + } + case Intrinsic::log: { + ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl"); + break; + } + case Intrinsic::log2: { + ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l"); + break; + } + case Intrinsic::log10: { + ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l"); + break; + } + case Intrinsic::exp: { + ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl"); + break; + } + case Intrinsic::exp2: { + ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l"); + break; + } + case Intrinsic::pow: { + ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl"); + break; + } + case Intrinsic::sin: { + ReplaceFPIntrinsicWithCall(CI, "sinf", "sin", "sinl"); + break; + } + case Intrinsic::cos: { + ReplaceFPIntrinsicWithCall(CI, "cosf", "cos", "cosl"); + break; + } + case Intrinsic::floor: { + ReplaceFPIntrinsicWithCall(CI, "floorf", "floor", "floorl"); + break; + } + case Intrinsic::ceil: { + ReplaceFPIntrinsicWithCall(CI, "ceilf", "ceil", "ceill"); + break; + } + case Intrinsic::trunc: { + ReplaceFPIntrinsicWithCall(CI, "truncf", "trunc", "truncl"); + break; + } + case Intrinsic::round: { + ReplaceFPIntrinsicWithCall(CI, "roundf", "round", "roundl"); + break; + } + case Intrinsic::copysign: { + ReplaceFPIntrinsicWithCall(CI, "copysignf", "copysign", "copysignl"); + break; + } + case Intrinsic::flt_rounds: + // Lower to "round to the nearest" + if (!CI->getType()->isVoidTy()) + CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1)); + break; + case Intrinsic::invariant_start: + case Intrinsic::lifetime_start: + // Discard region information. + CI->replaceAllUsesWith(UndefValue::get(CI->getType())); + break; + case Intrinsic::invariant_end: + case Intrinsic::lifetime_end: + // Discard region information. + break; + } + + assert(CI->use_empty() && + "Lowering should have eliminated any uses of the intrinsic call!"); + CI->eraseFromParent(); +} + +bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) { + // Verify this is a simple bswap. + if (CI->getNumArgOperands() != 1 || + CI->getType() != CI->getArgOperand(0)->getType() || + !CI->getType()->isIntegerTy()) + return false; + + IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); + if (!Ty) + return false; + + // Okay, we can do this xform, do so now. + Module *M = CI->getModule(); + Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty); + + Value *Op = CI->getArgOperand(0); + Op = CallInst::Create(Int, Op, CI->getName(), CI); + + CI->replaceAllUsesWith(Op); + CI->eraseFromParent(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp new file mode 100644 index 0000000..1c27377 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -0,0 +1,271 @@ +//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LLVMTargetMachine class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetMachine.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/BasicTTIImpl.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Transforms/Scalar.h" +using namespace llvm; + +// Enable or disable FastISel. Both options are needed, because +// FastISel is enabled by default with -fast, and we wish to be +// able to enable or disable fast-isel independently from -O0. +static cl::opt<cl::boolOrDefault> +EnableFastISelOption("fast-isel", cl::Hidden, + cl::desc("Enable the \"fast\" instruction selector")); + +void LLVMTargetMachine::initAsmInfo() { + MRI = TheTarget.createMCRegInfo(getTargetTriple().str()); + MII = TheTarget.createMCInstrInfo(); + // FIXME: Having an MCSubtargetInfo on the target machine is a hack due + // to some backends having subtarget feature dependent module level + // code generation. This is similar to the hack in the AsmPrinter for + // module level assembly etc. + STI = TheTarget.createMCSubtargetInfo(getTargetTriple().str(), getTargetCPU(), + getTargetFeatureString()); + + MCAsmInfo *TmpAsmInfo = + TheTarget.createMCAsmInfo(*MRI, getTargetTriple().str()); + // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, + // and if the old one gets included then MCAsmInfo will be NULL and + // we'll crash later. + // Provide the user with a useful error message about what's wrong. + assert(TmpAsmInfo && "MCAsmInfo not initialized. " + "Make sure you include the correct TargetSelect.h" + "and that InitializeAllTargetMCs() is being invoked!"); + + if (Options.DisableIntegratedAS) + TmpAsmInfo->setUseIntegratedAssembler(false); + + if (Options.CompressDebugSections) + TmpAsmInfo->setCompressDebugSections(true); + + AsmInfo = TmpAsmInfo; +} + +LLVMTargetMachine::LLVMTargetMachine(const Target &T, + StringRef DataLayoutString, + const Triple &TT, StringRef CPU, + StringRef FS, TargetOptions Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : TargetMachine(T, DataLayoutString, TT, CPU, FS, Options) { + CodeGenInfo = T.createMCCodeGenInfo(TT.str(), RM, CM, OL); +} + +TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() { + return TargetIRAnalysis([this](const Function &F) { + return TargetTransformInfo(BasicTTIImpl(this, F)); + }); +} + +/// addPassesToX helper drives creation and initialization of TargetPassConfig. +static MCContext * +addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, + bool DisableVerify, AnalysisID StartBefore, + AnalysisID StartAfter, AnalysisID StopAfter, + MachineFunctionInitializer *MFInitializer = nullptr) { + + // Add internal analysis passes from the target machine. + PM.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); + + // Targets may override createPassConfig to provide a target-specific + // subclass. + TargetPassConfig *PassConfig = TM->createPassConfig(PM); + PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter); + + // Set PassConfig options provided by TargetMachine. + PassConfig->setDisableVerify(DisableVerify); + + PM.add(PassConfig); + + PassConfig->addIRPasses(); + + PassConfig->addCodeGenPrepare(); + + PassConfig->addPassesToHandleExceptions(); + + PassConfig->addISelPrepare(); + + // Install a MachineModuleInfo class, which is an immutable pass that holds + // all the per-module stuff we're generating, including MCContext. + MachineModuleInfo *MMI = new MachineModuleInfo( + *TM->getMCAsmInfo(), *TM->getMCRegisterInfo(), TM->getObjFileLowering()); + PM.add(MMI); + + // Set up a MachineFunction for the rest of CodeGen to work on. + PM.add(new MachineFunctionAnalysis(*TM, MFInitializer)); + + // Enable FastISel with -fast, but allow that to be overridden. + TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE); + if (EnableFastISelOption == cl::BOU_TRUE || + (TM->getOptLevel() == CodeGenOpt::None && + TM->getO0WantsFastISel())) + TM->setFastISel(true); + + // Ask the target for an isel. + if (PassConfig->addInstSelector()) + return nullptr; + + PassConfig->addMachinePasses(); + + PassConfig->setInitialized(); + + return &MMI->getContext(); +} + +bool LLVMTargetMachine::addPassesToEmitFile( + PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType, + bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter, + AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) { + // Add common CodeGen passes. + MCContext *Context = + addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter, + StopAfter, MFInitializer); + if (!Context) + return true; + + if (StopAfter) { + PM.add(createPrintMIRPass(outs())); + return false; + } + + if (Options.MCOptions.MCSaveTempLabels) + Context->setAllowTemporaryLabels(false); + + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); + const MCAsmInfo &MAI = *getMCAsmInfo(); + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + const MCInstrInfo &MII = *getMCInstrInfo(); + + std::unique_ptr<MCStreamer> AsmStreamer; + + switch (FileType) { + case CGFT_AssemblyFile: { + MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter( + getTargetTriple(), MAI.getAssemblerDialect(), MAI, MII, MRI); + + // Create a code emitter if asked to show the encoding. + MCCodeEmitter *MCE = nullptr; + if (Options.MCOptions.ShowMCEncoding) + MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + + MCAsmBackend *MAB = + getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU); + auto FOut = llvm::make_unique<formatted_raw_ostream>(Out); + MCStreamer *S = getTarget().createAsmStreamer( + *Context, std::move(FOut), Options.MCOptions.AsmVerbose, + Options.MCOptions.MCUseDwarfDirectory, InstPrinter, MCE, MAB, + Options.MCOptions.ShowMCInst); + AsmStreamer.reset(S); + break; + } + case CGFT_ObjectFile: { + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context); + MCAsmBackend *MAB = + getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU); + if (!MCE || !MAB) + return true; + + // Don't waste memory on names of temp labels. + Context->setUseNamesOnTempLabels(false); + + Triple T(getTargetTriple().str()); + AsmStreamer.reset(getTarget().createMCObjectStreamer( + T, *Context, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + Options.MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ true)); + break; + } + case CGFT_Null: + // The Null output is intended for use for performance analysis and testing, + // not real users. + AsmStreamer.reset(getTarget().createNullStreamer(*Context)); + break; + } + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); + if (!Printer) + return true; + + PM.add(Printer); + + return false; +} + +/// addPassesToEmitMC - Add passes to the specified pass manager to get +/// machine code emitted with the MCJIT. This method returns true if machine +/// code is not supported. It fills the MCContext Ctx pointer which can be +/// used to build custom MCStreamer. +/// +bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_pwrite_stream &Out, + bool DisableVerify) { + // Add common CodeGen passes. + Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr, + nullptr); + if (!Ctx) + return true; + + if (Options.MCOptions.MCSaveTempLabels) + Ctx->setAllowTemporaryLabels(false); + + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + const MCRegisterInfo &MRI = *getMCRegisterInfo(); + MCCodeEmitter *MCE = + getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx); + MCAsmBackend *MAB = + getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU); + if (!MCE || !MAB) + return true; + + const Triple &T = getTargetTriple(); + const MCSubtargetInfo &STI = *getMCSubtargetInfo(); + std::unique_ptr<MCStreamer> AsmStreamer(getTarget().createMCObjectStreamer( + T, *Ctx, *MAB, Out, MCE, STI, Options.MCOptions.MCRelaxAll, + Options.MCOptions.MCIncrementalLinkerCompatible, + /*DWARFMustBeAtTheEnd*/ true)); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = + getTarget().createAsmPrinter(*this, std::move(AsmStreamer)); + if (!Printer) + return true; + + PM.add(Printer); + + return false; // success! +} diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp new file mode 100644 index 0000000..4321849 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp @@ -0,0 +1,140 @@ +//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LatencyPriorityQueue class, which is a +// SchedulingPriorityQueue that schedules using latency information to +// reduce the length of the critical path through the basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "scheduler" + +bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return RHSNum < LHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = nullptr; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return nullptr; + OnlyAvailablePred = &Pred; + } + } + + return OnlyAvailablePred; +} + +void LatencyPriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + } + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + + Queue.push_back(SU); +} + + +// scheduledNode - As nodes are scheduled, we look to see if there are any +// successor nodes that have a single unscheduled predecessor. If so, that +// single predecessor has a higher priority, since scheduling it will make +// the node available. +void LatencyPriorityQueue::scheduledNode(SUnit *SU) { + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + AdjustPriorityOfUnscheduledPreds(I->getSUnit()); + } +} + +/// AdjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + +SUnit *LatencyPriorityQueue::pop() { + if (empty()) return nullptr; + std::vector<SUnit *>::iterator Best = Queue.begin(); + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != std::prev(Queue.end())) + std::swap(*Best, Queue.back()); + Queue.pop_back(); + return V; +} + +void LatencyPriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != std::prev(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); +} diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp new file mode 100644 index 0000000..be61a20 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -0,0 +1,333 @@ +//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements LexicalScopes analysis. +// +// This pass collects lexical scope information and maps machine instructions +// to respective lexical scopes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +using namespace llvm; + +#define DEBUG_TYPE "lexicalscopes" + +/// reset - Reset the instance so that it's prepared for another function. +void LexicalScopes::reset() { + MF = nullptr; + CurrentFnLexicalScope = nullptr; + LexicalScopeMap.clear(); + AbstractScopeMap.clear(); + InlinedLexicalScopeMap.clear(); + AbstractScopesList.clear(); +} + +/// initialize - Scan machine function and constuct lexical scope nest. +void LexicalScopes::initialize(const MachineFunction &Fn) { + reset(); + MF = &Fn; + SmallVector<InsnRange, 4> MIRanges; + DenseMap<const MachineInstr *, LexicalScope *> MI2ScopeMap; + extractLexicalScopes(MIRanges, MI2ScopeMap); + if (CurrentFnLexicalScope) { + constructScopeNest(CurrentFnLexicalScope); + assignInstructionRanges(MIRanges, MI2ScopeMap); + } +} + +/// extractLexicalScopes - Extract instruction ranges for each lexical scopes +/// for the given machine function. +void LexicalScopes::extractLexicalScopes( + SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { + + // Scan each instruction and create scopes. First build working set of scopes. + for (const auto &MBB : *MF) { + const MachineInstr *RangeBeginMI = nullptr; + const MachineInstr *PrevMI = nullptr; + const DILocation *PrevDL = nullptr; + for (const auto &MInsn : MBB) { + // Check if instruction has valid location information. + const DILocation *MIDL = MInsn.getDebugLoc(); + if (!MIDL) { + PrevMI = &MInsn; + continue; + } + + // If scope has not changed then skip this instruction. + if (MIDL == PrevDL) { + PrevMI = &MInsn; + continue; + } + + // Ignore DBG_VALUE. It does not contribute to any instruction in output. + if (MInsn.isDebugValue()) + continue; + + if (RangeBeginMI) { + // If we have already seen a beginning of an instruction range and + // current instruction scope does not match scope of first instruction + // in this range then create a new instruction range. + InsnRange R(RangeBeginMI, PrevMI); + MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); + MIRanges.push_back(R); + } + + // This is a beginning of a new instruction range. + RangeBeginMI = &MInsn; + + // Reset previous markers. + PrevMI = &MInsn; + PrevDL = MIDL; + } + + // Create last instruction range. + if (RangeBeginMI && PrevMI && PrevDL) { + InsnRange R(RangeBeginMI, PrevMI); + MIRanges.push_back(R); + MI2ScopeMap[RangeBeginMI] = getOrCreateLexicalScope(PrevDL); + } + } +} + +/// findLexicalScope - Find lexical scope, either regular or inlined, for the +/// given DebugLoc. Return NULL if not found. +LexicalScope *LexicalScopes::findLexicalScope(const DILocation *DL) { + DILocalScope *Scope = DL->getScope(); + if (!Scope) + return nullptr; + + // The scope that we were created with could have an extra file - which + // isn't what we care about in this case. + if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) + Scope = File->getScope(); + + if (auto *IA = DL->getInlinedAt()) { + auto I = InlinedLexicalScopeMap.find(std::make_pair(Scope, IA)); + return I != InlinedLexicalScopeMap.end() ? &I->second : nullptr; + } + return findLexicalScope(Scope); +} + +/// getOrCreateLexicalScope - Find lexical scope for the given DebugLoc. If +/// not available then create new lexical scope. +LexicalScope *LexicalScopes::getOrCreateLexicalScope(const DILocalScope *Scope, + const DILocation *IA) { + if (IA) { + // Create an abstract scope for inlined function. + getOrCreateAbstractScope(Scope); + // Create an inlined scope for inlined function. + return getOrCreateInlinedScope(Scope, IA); + } + + return getOrCreateRegularScope(Scope); +} + +/// getOrCreateRegularScope - Find or create a regular lexical scope. +LexicalScope * +LexicalScopes::getOrCreateRegularScope(const DILocalScope *Scope) { + if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) + Scope = File->getScope(); + + auto I = LexicalScopeMap.find(Scope); + if (I != LexicalScopeMap.end()) + return &I->second; + + // FIXME: Should the following dyn_cast be DILexicalBlock? + LexicalScope *Parent = nullptr; + if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope)) + Parent = getOrCreateLexicalScope(Block->getScope()); + I = LexicalScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, Scope, nullptr, + false)).first; + + if (!Parent) { + assert(cast<DISubprogram>(Scope)->describes(MF->getFunction())); + assert(!CurrentFnLexicalScope); + CurrentFnLexicalScope = &I->second; + } + + return &I->second; +} + +/// getOrCreateInlinedScope - Find or create an inlined lexical scope. +LexicalScope * +LexicalScopes::getOrCreateInlinedScope(const DILocalScope *Scope, + const DILocation *InlinedAt) { + std::pair<const DILocalScope *, const DILocation *> P(Scope, InlinedAt); + auto I = InlinedLexicalScopeMap.find(P); + if (I != InlinedLexicalScopeMap.end()) + return &I->second; + + LexicalScope *Parent; + if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope)) + Parent = getOrCreateInlinedScope(Block->getScope(), InlinedAt); + else + Parent = getOrCreateLexicalScope(InlinedAt); + + I = InlinedLexicalScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(P), + std::forward_as_tuple(Parent, Scope, + InlinedAt, false)) + .first; + return &I->second; +} + +/// getOrCreateAbstractScope - Find or create an abstract lexical scope. +LexicalScope * +LexicalScopes::getOrCreateAbstractScope(const DILocalScope *Scope) { + assert(Scope && "Invalid Scope encoding!"); + + if (auto *File = dyn_cast<DILexicalBlockFile>(Scope)) + Scope = File->getScope(); + auto I = AbstractScopeMap.find(Scope); + if (I != AbstractScopeMap.end()) + return &I->second; + + // FIXME: Should the following isa be DILexicalBlock? + LexicalScope *Parent = nullptr; + if (auto *Block = dyn_cast<DILexicalBlockBase>(Scope)) + Parent = getOrCreateAbstractScope(Block->getScope()); + + I = AbstractScopeMap.emplace(std::piecewise_construct, + std::forward_as_tuple(Scope), + std::forward_as_tuple(Parent, Scope, + nullptr, true)).first; + if (isa<DISubprogram>(Scope)) + AbstractScopesList.push_back(&I->second); + return &I->second; +} + +/// constructScopeNest +void LexicalScopes::constructScopeNest(LexicalScope *Scope) { + assert(Scope && "Unable to calculate scope dominance graph!"); + SmallVector<LexicalScope *, 4> WorkStack; + WorkStack.push_back(Scope); + unsigned Counter = 0; + while (!WorkStack.empty()) { + LexicalScope *WS = WorkStack.back(); + const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren(); + bool visitedChildren = false; + for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(), + SE = Children.end(); + SI != SE; ++SI) { + LexicalScope *ChildScope = *SI; + if (!ChildScope->getDFSOut()) { + WorkStack.push_back(ChildScope); + visitedChildren = true; + ChildScope->setDFSIn(++Counter); + break; + } + } + if (!visitedChildren) { + WorkStack.pop_back(); + WS->setDFSOut(++Counter); + } + } +} + +/// assignInstructionRanges - Find ranges of instructions covered by each +/// lexical scope. +void LexicalScopes::assignInstructionRanges( + SmallVectorImpl<InsnRange> &MIRanges, + DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) { + + LexicalScope *PrevLexicalScope = nullptr; + for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(), + RE = MIRanges.end(); + RI != RE; ++RI) { + const InsnRange &R = *RI; + LexicalScope *S = MI2ScopeMap.lookup(R.first); + assert(S && "Lost LexicalScope for a machine instruction!"); + if (PrevLexicalScope && !PrevLexicalScope->dominates(S)) + PrevLexicalScope->closeInsnRange(S); + S->openInsnRange(R.first); + S->extendInsnRange(R.second); + PrevLexicalScope = S; + } + + if (PrevLexicalScope) + PrevLexicalScope->closeInsnRange(); +} + +/// getMachineBasicBlocks - Populate given set using machine basic blocks which +/// have machine instructions that belong to lexical scope identified by +/// DebugLoc. +void LexicalScopes::getMachineBasicBlocks( + const DILocation *DL, SmallPtrSetImpl<const MachineBasicBlock *> &MBBs) { + MBBs.clear(); + LexicalScope *Scope = getOrCreateLexicalScope(DL); + if (!Scope) + return; + + if (Scope == CurrentFnLexicalScope) { + for (const auto &MBB : *MF) + MBBs.insert(&MBB); + return; + } + + SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges(); + for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(), + E = InsnRanges.end(); + I != E; ++I) { + InsnRange &R = *I; + MBBs.insert(R.first->getParent()); + } +} + +/// dominates - Return true if DebugLoc's lexical scope dominates at least one +/// machine instruction's lexical scope in a given machine basic block. +bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) { + LexicalScope *Scope = getOrCreateLexicalScope(DL); + if (!Scope) + return false; + + // Current function scope covers all basic blocks in the function. + if (Scope == CurrentFnLexicalScope && MBB->getParent() == MF) + return true; + + bool Result = false; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) { + if (const DILocation *IDL = I->getDebugLoc()) + if (LexicalScope *IScope = getOrCreateLexicalScope(IDL)) + if (Scope->dominates(IScope)) + return true; + } + return Result; +} + +/// dump - Print data structures. +void LexicalScope::dump(unsigned Indent) const { +#ifndef NDEBUG + raw_ostream &err = dbgs(); + err.indent(Indent); + err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; + const MDNode *N = Desc; + err.indent(Indent); + N->dump(); + if (AbstractScope) + err << std::string(Indent, ' ') << "Abstract Scope\n"; + + if (!Children.empty()) + err << std::string(Indent + 2, ' ') << "Children ...\n"; + for (unsigned i = 0, e = Children.size(); i != e; ++i) + if (Children[i] != this) + Children[i]->dump(Indent + 2); +#endif +} diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp new file mode 100644 index 0000000..98d30b9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp @@ -0,0 +1,405 @@ +//===------ LiveDebugValues.cpp - Tracking Debug Value MIs ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// This pass implements a data flow analysis that propagates debug location +/// information by inserting additional DBG_VALUE instructions into the machine +/// instruction stream. The pass internally builds debug location liveness +/// ranges to determine the points where additional DBG_VALUEs need to be +/// inserted. +/// +/// This is a separate pass from DbgValueHistoryCalculator to facilitate +/// testing and improve modularity. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <deque> +#include <list> + +using namespace llvm; + +#define DEBUG_TYPE "live-debug-values" + +STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted"); + +namespace { + +class LiveDebugValues : public MachineFunctionPass { + +private: + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + + typedef std::pair<const DILocalVariable *, const DILocation *> + InlinedVariable; + + /// A potentially inlined instance of a variable. + struct DebugVariable { + const DILocalVariable *Var; + const DILocation *InlinedAt; + + DebugVariable(const DILocalVariable *_var, const DILocation *_inlinedAt) + : Var(_var), InlinedAt(_inlinedAt) {} + + bool operator==(const DebugVariable &DV) const { + return (Var == DV.Var) && (InlinedAt == DV.InlinedAt); + } + }; + + /// Member variables and functions for Range Extension across basic blocks. + struct VarLoc { + DebugVariable Var; + const MachineInstr *MI; // MachineInstr should be a DBG_VALUE instr. + + VarLoc(DebugVariable _var, const MachineInstr *_mi) : Var(_var), MI(_mi) {} + + bool operator==(const VarLoc &V) const; + }; + + typedef std::list<VarLoc> VarLocList; + typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB; + + bool OLChanged; // OutgoingLocs got changed for this bb. + bool MBBJoined; // The MBB was joined. + + void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges); + void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges); + void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges, + VarLocInMBB &OutLocs); + void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs); + + void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs); + + bool ExtendRanges(MachineFunction &MF); + +public: + static char ID; + + /// Default construct and initialize the pass. + LiveDebugValues(); + + /// Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// Print to ostream with a message. + void printVarLocInMBB(const VarLocInMBB &V, const char *msg, + raw_ostream &Out) const; + + /// Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // namespace + +//===----------------------------------------------------------------------===// +// Implementation +//===----------------------------------------------------------------------===// + +char LiveDebugValues::ID = 0; +char &llvm::LiveDebugValuesID = LiveDebugValues::ID; +INITIALIZE_PASS(LiveDebugValues, "livedebugvalues", "Live DEBUG_VALUE analysis", + false, false) + +/// Default construct and initialize the pass. +LiveDebugValues::LiveDebugValues() : MachineFunctionPass(ID) { + initializeLiveDebugValuesPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const { + MachineFunctionPass::getAnalysisUsage(AU); +} + +// \brief If @MI is a DBG_VALUE with debug value described by a defined +// register, returns the number of this register. In the other case, returns 0. +static unsigned isDescribedByReg(const MachineInstr &MI) { + assert(MI.isDebugValue()); + assert(MI.getNumOperands() == 4); + // If location of variable is described using a register (directly or + // indirecltly), this register is always a first operand. + return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0; +} + +// \brief This function takes two DBG_VALUE instructions and returns true +// if their offsets are equal; otherwise returns false. +static bool areOffsetsEqual(const MachineInstr &MI1, const MachineInstr &MI2) { + assert(MI1.isDebugValue()); + assert(MI1.getNumOperands() == 4); + + assert(MI2.isDebugValue()); + assert(MI2.getNumOperands() == 4); + + if (!MI1.isIndirectDebugValue() && !MI2.isIndirectDebugValue()) + return true; + + // Check if both MIs are indirect and they are equal. + if (MI1.isIndirectDebugValue() && MI2.isIndirectDebugValue()) + return MI1.getOperand(1).getImm() == MI2.getOperand(1).getImm(); + + return false; +} + +//===----------------------------------------------------------------------===// +// Debug Range Extension Implementation +//===----------------------------------------------------------------------===// + +void LiveDebugValues::printVarLocInMBB(const VarLocInMBB &V, const char *msg, + raw_ostream &Out) const { + Out << "Printing " << msg << ":\n"; + for (const auto &L : V) { + Out << "MBB: " << L.first->getName() << ":\n"; + for (const auto &VLL : L.second) { + Out << " Var: " << VLL.Var.Var->getName(); + Out << " MI: "; + (*VLL.MI).dump(); + Out << "\n"; + } + } + Out << "\n"; +} + +bool LiveDebugValues::VarLoc::operator==(const VarLoc &V) const { + return (Var == V.Var) && (isDescribedByReg(*MI) == isDescribedByReg(*V.MI)) && + (areOffsetsEqual(*MI, *V.MI)); +} + +/// End all previous ranges related to @MI and start a new range from @MI +/// if it is a DBG_VALUE instr. +void LiveDebugValues::transferDebugValue(MachineInstr &MI, + VarLocList &OpenRanges) { + if (!MI.isDebugValue()) + return; + const DILocalVariable *RawVar = MI.getDebugVariable(); + assert(RawVar->isValidLocationForIntrinsic(MI.getDebugLoc()) && + "Expected inlined-at fields to agree"); + DebugVariable Var(RawVar, MI.getDebugLoc()->getInlinedAt()); + + // End all previous ranges of Var. + OpenRanges.erase( + std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](const VarLoc &V) { return (Var == V.Var); }), + OpenRanges.end()); + + // Add Var to OpenRanges from this DBG_VALUE. + // TODO: Currently handles DBG_VALUE which has only reg as location. + if (isDescribedByReg(MI)) { + VarLoc V(Var, &MI); + OpenRanges.push_back(std::move(V)); + } +} + +/// A definition of a register may mark the end of a range. +void LiveDebugValues::transferRegisterDef(MachineInstr &MI, + VarLocList &OpenRanges) { + for (const MachineOperand &MO : MI.operands()) { + if (!(MO.isReg() && MO.isDef() && MO.getReg() && + TRI->isPhysicalRegister(MO.getReg()))) + continue; + // Remove ranges of all aliased registers. + for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI) + OpenRanges.erase(std::remove_if(OpenRanges.begin(), OpenRanges.end(), + [&](const VarLoc &V) { + return (*RAI == + isDescribedByReg(*V.MI)); + }), + OpenRanges.end()); + } +} + +/// Terminate all open ranges at the end of the current basic block. +void LiveDebugValues::transferTerminatorInst(MachineInstr &MI, + VarLocList &OpenRanges, + VarLocInMBB &OutLocs) { + const MachineBasicBlock *CurMBB = MI.getParent(); + if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back()))) + return; + + if (OpenRanges.empty()) + return; + + if (OutLocs.find(CurMBB) == OutLocs.end()) { + // Create space for new Outgoing locs entries. + VarLocList VLL; + OutLocs.insert(std::make_pair(CurMBB, std::move(VLL))); + } + auto OL = OutLocs.find(CurMBB); + assert(OL != OutLocs.end()); + VarLocList &VLL = OL->second; + + for (auto OR : OpenRanges) { + // Copy OpenRanges to OutLocs, if not already present. + assert(OR.MI->isDebugValue()); + DEBUG(dbgs() << "Add to OutLocs: "; OR.MI->dump();); + if (std::find_if(VLL.begin(), VLL.end(), + [&](const VarLoc &V) { return (OR == V); }) == VLL.end()) { + VLL.push_back(std::move(OR)); + OLChanged = true; + } + } + OpenRanges.clear(); +} + +/// This routine creates OpenRanges and OutLocs. +void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges, + VarLocInMBB &OutLocs) { + transferDebugValue(MI, OpenRanges); + transferRegisterDef(MI, OpenRanges); + transferTerminatorInst(MI, OpenRanges, OutLocs); +} + +/// This routine joins the analysis results of all incoming edges in @MBB by +/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same +/// source variable in all the predecessors of @MBB reside in the same location. +void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, + VarLocInMBB &InLocs) { + DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n"); + + MBBJoined = false; + + VarLocList InLocsT; // Temporary incoming locations. + + // For all predecessors of this MBB, find the set of VarLocs that can be + // joined. + for (auto p : MBB.predecessors()) { + auto OL = OutLocs.find(p); + // Join is null in case of empty OutLocs from any of the pred. + if (OL == OutLocs.end()) + return; + + // Just copy over the Out locs to incoming locs for the first predecessor. + if (p == *MBB.pred_begin()) { + InLocsT = OL->second; + continue; + } + + // Join with this predecessor. + VarLocList &VLL = OL->second; + InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(), + [&](VarLoc &ILT) { + return (std::find_if(VLL.begin(), VLL.end(), + [&](const VarLoc &V) { + return (ILT == V); + }) == VLL.end()); + }), + InLocsT.end()); + } + + if (InLocsT.empty()) + return; + + if (InLocs.find(&MBB) == InLocs.end()) { + // Create space for new Incoming locs entries. + VarLocList VLL; + InLocs.insert(std::make_pair(&MBB, std::move(VLL))); + } + auto IL = InLocs.find(&MBB); + assert(IL != InLocs.end()); + VarLocList &ILL = IL->second; + + // Insert DBG_VALUE instructions, if not already inserted. + for (auto ILT : InLocsT) { + if (std::find_if(ILL.begin(), ILL.end(), [&](const VarLoc &I) { + return (ILT == I); + }) == ILL.end()) { + // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a + // new range is started for the var from the mbb's beginning by inserting + // a new DBG_VALUE. transfer() will end this range however appropriate. + const MachineInstr *DMI = ILT.MI; + MachineInstr *MI = + BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(), + DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(), 0, + DMI->getDebugVariable(), DMI->getDebugExpression()); + if (DMI->isIndirectDebugValue()) + MI->getOperand(1).setImm(DMI->getOperand(1).getImm()); + DEBUG(dbgs() << "Inserted: "; MI->dump();); + ++NumInserted; + MBBJoined = true; // rerun transfer(). + + VarLoc V(ILT.Var, MI); + ILL.push_back(std::move(V)); + } + } +} + +/// Calculate the liveness information for the given machine function and +/// extend ranges across basic blocks. +bool LiveDebugValues::ExtendRanges(MachineFunction &MF) { + + DEBUG(dbgs() << "\nDebug Range Extension\n"); + + bool Changed = false; + OLChanged = MBBJoined = false; + + VarLocList OpenRanges; // Ranges that are open until end of bb. + VarLocInMBB OutLocs; // Ranges that exist beyond bb. + VarLocInMBB InLocs; // Ranges that are incoming after joining. + + std::deque<MachineBasicBlock *> BBWorklist; + + // Initialize every mbb with OutLocs. + for (auto &MBB : MF) + for (auto &MI : MBB) + transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs())); + + // Construct a worklist of MBBs. + for (auto &MBB : MF) + BBWorklist.push_back(&MBB); + + // Perform join() and transfer() using the worklist until the ranges converge + // Ranges have converged when the worklist is empty. + while (!BBWorklist.empty()) { + MachineBasicBlock *MBB = BBWorklist.front(); + BBWorklist.pop_front(); + + join(*MBB, OutLocs, InLocs); + + if (MBBJoined) { + Changed = true; + for (auto &MI : *MBB) + transfer(MI, OpenRanges, OutLocs); + DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs())); + + if (OLChanged) { + OLChanged = false; + for (auto s : MBB->successors()) + if (std::find(BBWorklist.begin(), BBWorklist.end(), s) == + BBWorklist.end()) // add if not already present. + BBWorklist.push_back(s); + } + } + } + DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs())); + DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs())); + return Changed; +} + +bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + + bool Changed = false; + + Changed |= ExtendRanges(MF); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp new file mode 100644 index 0000000..6dac7db --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -0,0 +1,1040 @@ +//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveDebugVariables analysis. +// +// Remove all DBG_VALUE instructions referencing virtual registers and replace +// them with a data structure tracking where live user variables are kept - in a +// virtual register or in a stack slot. +// +// Allow the data structure to be updated during register allocation when values +// are moved between registers and stack slots. Finally emit new DBG_VALUE +// instructions after register allocation is complete. +// +//===----------------------------------------------------------------------===// + +#include "LiveDebugVariables.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <memory> + +using namespace llvm; + +#define DEBUG_TYPE "livedebug" + +static cl::opt<bool> +EnableLDV("live-debug-variables", cl::init(true), + cl::desc("Enable the live debug variables pass"), cl::Hidden); + +STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted"); +char LiveDebugVariables::ID = 0; + +INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars", + "Debug Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars", + "Debug Variable Analysis", false, false) + +void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addRequiredTransitive<LiveIntervals>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullptr) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); +} + +/// LocMap - Map of where a user value is live, and its location. +typedef IntervalMap<SlotIndex, unsigned, 4> LocMap; + +namespace { +/// UserValueScopes - Keeps track of lexical scopes associated with a +/// user value's source location. +class UserValueScopes { + DebugLoc DL; + LexicalScopes &LS; + SmallPtrSet<const MachineBasicBlock *, 4> LBlocks; + +public: + UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(D), LS(L) {} + + /// dominates - Return true if current scope dominates at least one machine + /// instruction in a given machine basic block. + bool dominates(MachineBasicBlock *MBB) { + if (LBlocks.empty()) + LS.getMachineBasicBlocks(DL, LBlocks); + return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB); + } +}; +} // end anonymous namespace + +/// UserValue - A user value is a part of a debug info user variable. +/// +/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register +/// holds part of a user variable. The part is identified by a byte offset. +/// +/// UserValues are grouped into equivalence classes for easier searching. Two +/// user values are related if they refer to the same variable, or if they are +/// held by the same virtual register. The equivalence class is the transitive +/// closure of that relation. +namespace { +class LDVImpl; +class UserValue { + const MDNode *Variable; ///< The debug info variable we are part of. + const MDNode *Expression; ///< Any complex address expression. + unsigned offset; ///< Byte offset into variable. + bool IsIndirect; ///< true if this is a register-indirect+offset value. + DebugLoc dl; ///< The debug location for the variable. This is + ///< used by dwarf writer to find lexical scope. + UserValue *leader; ///< Equivalence class leader. + UserValue *next; ///< Next value in equivalence class, or null. + + /// Numbered locations referenced by locmap. + SmallVector<MachineOperand, 4> locations; + + /// Map of slot indices where this value is live. + LocMap locInts; + + /// coalesceLocation - After LocNo was changed, check if it has become + /// identical to another location, and coalesce them. This may cause LocNo or + /// a later location to be erased, but no earlier location will be erased. + void coalesceLocation(unsigned LocNo); + + /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo. + void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, + LiveIntervals &LIS, const TargetInstrInfo &TII); + + /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs + /// is live. Returns true if any changes were made. + bool splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); + +public: + /// UserValue - Create a new UserValue. + UserValue(const MDNode *var, const MDNode *expr, unsigned o, bool i, + DebugLoc L, LocMap::Allocator &alloc) + : Variable(var), Expression(expr), offset(o), IsIndirect(i), dl(L), + leader(this), next(nullptr), locInts(alloc) {} + + /// getLeader - Get the leader of this value's equivalence class. + UserValue *getLeader() { + UserValue *l = leader; + while (l != l->leader) + l = l->leader; + return leader = l; + } + + /// getNext - Return the next UserValue in the equivalence class. + UserValue *getNext() const { return next; } + + /// match - Does this UserValue match the parameters? + bool match(const MDNode *Var, const MDNode *Expr, const DILocation *IA, + unsigned Offset, bool indirect) const { + return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA && + Offset == offset && indirect == IsIndirect; + } + + /// merge - Merge equivalence classes. + static UserValue *merge(UserValue *L1, UserValue *L2) { + L2 = L2->getLeader(); + if (!L1) + return L2; + L1 = L1->getLeader(); + if (L1 == L2) + return L1; + // Splice L2 before L1's members. + UserValue *End = L2; + while (End->next) + End->leader = L1, End = End->next; + End->leader = L1; + End->next = L1->next; + L1->next = L2; + return L1; + } + + /// getLocationNo - Return the location number that matches Loc. + unsigned getLocationNo(const MachineOperand &LocMO) { + if (LocMO.isReg()) { + if (LocMO.getReg() == 0) + return ~0u; + // For register locations we dont care about use/def and other flags. + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + locations[i].getReg() == LocMO.getReg() && + locations[i].getSubReg() == LocMO.getSubReg()) + return i; + } else + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (LocMO.isIdenticalTo(locations[i])) + return i; + locations.push_back(LocMO); + // We are storing a MachineOperand outside a MachineInstr. + locations.back().clearParent(); + // Don't store def operands. + if (locations.back().isReg()) + locations.back().setIsUse(); + return locations.size() - 1; + } + + /// mapVirtRegs - Ensure that all virtual register locations are mapped. + void mapVirtRegs(LDVImpl *LDV); + + /// addDef - Add a definition point to this value. + void addDef(SlotIndex Idx, const MachineOperand &LocMO) { + // Add a singular (Idx,Idx) -> Loc mapping. + LocMap::iterator I = locInts.find(Idx); + if (!I.valid() || I.start() != Idx) + I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO)); + else + // A later DBG_VALUE at the same SlotIndex overrides the old location. + I.setValue(getLocationNo(LocMO)); + } + + /// extendDef - Extend the current definition as far as possible down the + /// dominator tree. Stop when meeting an existing def or when leaving the live + /// range of VNI. + /// End points where VNI is no longer live are added to Kills. + /// @param Idx Starting point for the definition. + /// @param LocNo Location number to propagate. + /// @param LR Restrict liveness to where LR has the value VNI. May be null. + /// @param VNI When LR is not null, this is the value to restrict to. + /// @param Kills Append end points of VNI's live range to Kills. + /// @param LIS Live intervals analysis. + /// @param MDT Dominator tree. + void extendDef(SlotIndex Idx, unsigned LocNo, + LiveRange *LR, const VNInfo *VNI, + SmallVectorImpl<SlotIndex> *Kills, + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS); + + /// addDefsFromCopies - The value in LI/LocNo may be copies to other + /// registers. Determine if any of the copies are available at the kill + /// points, and add defs if possible. + /// @param LI Scan for copies of the value in LI->reg. + /// @param LocNo Location number of LI->reg. + /// @param Kills Points where the range of LocNo could be extended. + /// @param NewDefs Append (Idx, LocNo) of inserted defs here. + void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, + LiveIntervals &LIS); + + /// computeIntervals - Compute the live intervals of all locations after + /// collecting all their def points. + void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS); + + /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is + /// live. Returns true if any changes were made. + bool splitRegister(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); + + /// rewriteLocations - Rewrite virtual register locations according to the + /// provided virtual register map. + void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); + + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. + void emitDebugValues(VirtRegMap *VRM, + LiveIntervals &LIS, const TargetInstrInfo &TRI); + + /// getDebugLoc - Return DebugLoc of this UserValue. + DebugLoc getDebugLoc() { return dl;} + void print(raw_ostream &, const TargetRegisterInfo *); +}; +} // namespace + +/// LDVImpl - Implementation of the LiveDebugVariables pass. +namespace { +class LDVImpl { + LiveDebugVariables &pass; + LocMap::Allocator allocator; + MachineFunction *MF; + LiveIntervals *LIS; + LexicalScopes LS; + MachineDominatorTree *MDT; + const TargetRegisterInfo *TRI; + + /// Whether emitDebugValues is called. + bool EmitDone; + /// Whether the machine function is modified during the pass. + bool ModifiedMF; + + /// userValues - All allocated UserValue instances. + SmallVector<std::unique_ptr<UserValue>, 8> userValues; + + /// Map virtual register to eq class leader. + typedef DenseMap<unsigned, UserValue*> VRMap; + VRMap virtRegToEqClass; + + /// Map user variable to eq class leader. + typedef DenseMap<const MDNode *, UserValue*> UVMap; + UVMap userVarMap; + + /// getUserValue - Find or create a UserValue. + UserValue *getUserValue(const MDNode *Var, const MDNode *Expr, + unsigned Offset, bool IsIndirect, DebugLoc DL); + + /// lookupVirtReg - Find the EC leader for VirtReg or null. + UserValue *lookupVirtReg(unsigned VirtReg); + + /// handleDebugValue - Add DBG_VALUE instruction to our maps. + /// @param MI DBG_VALUE instruction + /// @param Idx Last valid SLotIndex before instruction. + /// @return True if the DBG_VALUE instruction should be deleted. + bool handleDebugValue(MachineInstr *MI, SlotIndex Idx); + + /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding + /// a UserValue def for each instruction. + /// @param mf MachineFunction to be scanned. + /// @return True if any debug values were found. + bool collectDebugValues(MachineFunction &mf); + + /// computeIntervals - Compute the live intervals of all user values after + /// collecting all their def points. + void computeIntervals(); + +public: + LDVImpl(LiveDebugVariables *ps) + : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {} + bool runOnMachineFunction(MachineFunction &mf); + + /// clear - Release all memory. + void clear() { + MF = nullptr; + userValues.clear(); + virtRegToEqClass.clear(); + userVarMap.clear(); + // Make sure we call emitDebugValues if the machine function was modified. + assert((!ModifiedMF || EmitDone) && + "Dbg values are not emitted in LDV"); + EmitDone = false; + ModifiedMF = false; + LS.reset(); + } + + /// mapVirtReg - Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); + + /// splitRegister - Replace all references to OldReg with NewRegs. + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs); + + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. + void emitDebugValues(VirtRegMap *VRM); + + void print(raw_ostream&); +}; +} // namespace + +static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, + const LLVMContext &Ctx) { + if (!DL) + return; + + auto *Scope = cast<DIScope>(DL.getScope()); + // Omit the directory, because it's likely to be long and uninteresting. + CommentOS << Scope->getFilename(); + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + + DebugLoc InlinedAtDL = DL.getInlinedAt(); + if (!InlinedAtDL) + return; + + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, CommentOS, Ctx); + CommentOS << " ]"; +} + +static void printExtendedName(raw_ostream &OS, const DILocalVariable *V, + const DILocation *DL) { + const LLVMContext &Ctx = V->getContext(); + StringRef Res = V->getName(); + if (!Res.empty()) + OS << Res << "," << V->getLine(); + if (auto *InlinedAt = DL->getInlinedAt()) { + if (DebugLoc InlinedAtDL = InlinedAt) { + OS << " @["; + printDebugLoc(InlinedAtDL, OS, Ctx); + OS << "]"; + } + } +} + +void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { + auto *DV = cast<DILocalVariable>(Variable); + OS << "!\""; + printExtendedName(OS, DV, dl); + + OS << "\"\t"; + if (offset) + OS << '+' << offset; + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) { + OS << " [" << I.start() << ';' << I.stop() << "):"; + if (I.value() == ~0u) + OS << "undef"; + else + OS << I.value(); + } + for (unsigned i = 0, e = locations.size(); i != e; ++i) { + OS << " Loc" << i << '='; + locations[i].print(OS, TRI); + } + OS << '\n'; +} + +void LDVImpl::print(raw_ostream &OS) { + OS << "********** DEBUG VARIABLES **********\n"; + for (unsigned i = 0, e = userValues.size(); i != e; ++i) + userValues[i]->print(OS, TRI); +} + +void UserValue::coalesceLocation(unsigned LocNo) { + unsigned KeepLoc = 0; + for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) { + if (KeepLoc == LocNo) + continue; + if (locations[KeepLoc].isIdenticalTo(locations[LocNo])) + break; + } + // No matches. + if (KeepLoc == locations.size()) + return; + + // Keep the smaller location, erase the larger one. + unsigned EraseLoc = LocNo; + if (KeepLoc > EraseLoc) + std::swap(KeepLoc, EraseLoc); + locations.erase(locations.begin() + EraseLoc); + + // Rewrite values. + for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) { + unsigned v = I.value(); + if (v == EraseLoc) + I.setValue(KeepLoc); // Coalesce when possible. + else if (v > EraseLoc) + I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values. + } +} + +void UserValue::mapVirtRegs(LDVImpl *LDV) { + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + LDV->mapVirtReg(locations[i].getReg(), this); +} + +UserValue *LDVImpl::getUserValue(const MDNode *Var, const MDNode *Expr, + unsigned Offset, bool IsIndirect, + DebugLoc DL) { + UserValue *&Leader = userVarMap[Var]; + if (Leader) { + UserValue *UV = Leader->getLeader(); + Leader = UV; + for (; UV; UV = UV->getNext()) + if (UV->match(Var, Expr, DL->getInlinedAt(), Offset, IsIndirect)) + return UV; + } + + userValues.push_back( + make_unique<UserValue>(Var, Expr, Offset, IsIndirect, DL, allocator)); + UserValue *UV = userValues.back().get(); + Leader = UserValue::merge(Leader, UV); + return UV; +} + +void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs"); + UserValue *&Leader = virtRegToEqClass[VirtReg]; + Leader = UserValue::merge(Leader, EC); +} + +UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { + if (UserValue *UV = virtRegToEqClass.lookup(VirtReg)) + return UV->getLeader(); + return nullptr; +} + +bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { + // DBG_VALUE loc, offset, variable + if (MI->getNumOperands() != 4 || + !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || + !MI->getOperand(2).isMetadata()) { + DEBUG(dbgs() << "Can't handle " << *MI); + return false; + } + + // Get or create the UserValue for (variable,offset). + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); + //here. + UserValue *UV = + getUserValue(Var, Expr, Offset, IsIndirect, MI->getDebugLoc()); + UV->addDef(Idx, MI->getOperand(0)); + return true; +} + +bool LDVImpl::collectDebugValues(MachineFunction &mf) { + bool Changed = false; + for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE; + ++MFI) { + MachineBasicBlock *MBB = &*MFI; + for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end(); + MBBI != MBBE;) { + if (!MBBI->isDebugValue()) { + ++MBBI; + continue; + } + // DBG_VALUE has no slot index, use the previous instruction instead. + SlotIndex Idx = MBBI == MBB->begin() ? + LIS->getMBBStartIdx(MBB) : + LIS->getInstructionIndex(std::prev(MBBI)).getRegSlot(); + // Handle consecutive DBG_VALUE instructions with the same slot index. + do { + if (handleDebugValue(MBBI, Idx)) { + MBBI = MBB->erase(MBBI); + Changed = true; + } else + ++MBBI; + } while (MBBI != MBBE && MBBI->isDebugValue()); + } + } + return Changed; +} + +/// We only propagate DBG_VALUES locally here. LiveDebugValues performs a +/// data-flow analysis to propagate them beyond basic block boundaries. +void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR, + const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills, + LiveIntervals &LIS, MachineDominatorTree &MDT, + UserValueScopes &UVS) { + SlotIndex Start = Idx; + MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start); + SlotIndex Stop = LIS.getMBBEndIdx(MBB); + LocMap::iterator I = locInts.find(Start); + + // Limit to VNI's live range. + bool ToEnd = true; + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { + if (Kills) + Kills->push_back(Start); + return; + } + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; + } + + // There could already be a short def at Start. + if (I.valid() && I.start() <= Start) { + // Stop when meeting a different location or an already extended interval. + Start = Start.getNextSlot(); + if (I.value() != LocNo || I.stop() != Start) + return; + // This is a one-slot placeholder. Just skip it. + ++I; + } + + // Limited by the next def. + if (I.valid() && I.start() < Stop) + Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); + + if (Start < Stop) + I.insert(Start, Stop, LocNo); +} + +void +UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl<SlotIndex> &Kills, + SmallVectorImpl<std::pair<SlotIndex, unsigned> > &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { + if (Kills.empty()) + return; + // Don't track copies from physregs, there are too many uses. + if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + return; + + // Collect all the (vreg, valno) pairs that are copies of LI. + SmallVector<std::pair<LiveInterval*, const VNInfo*>, 8> CopyValues; + for (MachineOperand &MO : MRI.use_nodbg_operands(LI->reg)) { + MachineInstr *MI = MO.getParent(); + // Copies of the full value. + if (MO.getSubReg() || !MI->isCopy()) + continue; + unsigned DstReg = MI->getOperand(0).getReg(); + + // Don't follow copies to physregs. These are usually setting up call + // arguments, and the argument registers are always call clobbered. We are + // better off in the source register which could be a callee-saved register, + // or it could be spilled. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + continue; + + // Is LocNo extended to reach this copy? If not, another def may be blocking + // it, or we are looking at a wrong value of LI. + SlotIndex Idx = LIS.getInstructionIndex(MI); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); + if (!I.valid() || I.value() != LocNo) + continue; + + if (!LIS.hasInterval(DstReg)) + continue; + LiveInterval *DstLI = &LIS.getInterval(DstReg); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); + CopyValues.push_back(std::make_pair(DstLI, DstVNI)); + } + + if (CopyValues.empty()) + return; + + DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); + + // Try to add defs of the copied values for each kill point. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + SlotIndex Idx = Kills[i]; + for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { + LiveInterval *DstLI = CopyValues[j].first; + const VNInfo *DstVNI = CopyValues[j].second; + if (DstLI->getVNInfoAt(Idx) != DstVNI) + continue; + // Check that there isn't already a def at Idx + LocMap::iterator I = locInts.find(Idx); + if (I.valid() && I.start() <= Idx) + continue; + DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + << DstVNI->id << " in " << *DstLI << '\n'); + MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); + assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); + unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); + I.insert(Idx, Idx.getNextSlot(), LocNo); + NewDefs.push_back(std::make_pair(Idx, LocNo)); + break; + } + } +} + +void +UserValue::computeIntervals(MachineRegisterInfo &MRI, + const TargetRegisterInfo &TRI, + LiveIntervals &LIS, + MachineDominatorTree &MDT, + UserValueScopes &UVS) { + SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs; + + // Collect all defs to be extended (Skipping undefs). + for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) + if (I.value() != ~0u) + Defs.push_back(std::make_pair(I.start(), I.value())); + + // Extend all defs, and possibly add new ones along the way. + for (unsigned i = 0; i != Defs.size(); ++i) { + SlotIndex Idx = Defs[i].first; + unsigned LocNo = Defs[i].second; + const MachineOperand &Loc = locations[LocNo]; + + if (!Loc.isReg()) { + extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS); + continue; + } + + // Register locations are constrained to where the register value is live. + if (TargetRegisterInfo::isVirtualRegister(Loc.getReg())) { + LiveInterval *LI = nullptr; + const VNInfo *VNI = nullptr; + if (LIS.hasInterval(Loc.getReg())) { + LI = &LIS.getInterval(Loc.getReg()); + VNI = LI->getVNInfoAt(Idx); + } + SmallVector<SlotIndex, 16> Kills; + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS); + if (LI) + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); + continue; + } + + // For physregs, use the live range of the first regunit as a guide. + unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); + LiveRange *LR = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LR->getVNInfoAt(Idx); + // Don't track copies from physregs, it is too expensive. + extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS); + } + + // Finally, erase all the undefs. + for (LocMap::iterator I = locInts.begin(); I.valid();) + if (I.value() == ~0u) + I.erase(); + else + ++I; +} + +void LDVImpl::computeIntervals() { + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + UserValueScopes UVS(userValues[i]->getDebugLoc(), LS); + userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS); + userValues[i]->mapVirtRegs(this); + } +} + +bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + clear(); + MF = &mf; + LIS = &pass.getAnalysis<LiveIntervals>(); + MDT = &pass.getAnalysis<MachineDominatorTree>(); + TRI = mf.getSubtarget().getRegisterInfo(); + LS.initialize(mf); + DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " + << mf.getName() << " **********\n"); + + bool Changed = collectDebugValues(mf); + computeIntervals(); + DEBUG(print(dbgs())); + ModifiedMF = Changed; + return Changed; +} + +static void removeDebugValues(MachineFunction &mf) { + for (MachineBasicBlock &MBB : mf) { + for (auto MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE; ) { + if (!MBBI->isDebugValue()) { + ++MBBI; + continue; + } + MBBI = MBB.erase(MBBI); + } + } +} + +bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { + if (!EnableLDV) + return false; + if (!mf.getFunction()->getSubprogram()) { + removeDebugValues(mf); + return false; + } + if (!pImpl) + pImpl = new LDVImpl(this); + return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); +} + +void LiveDebugVariables::releaseMemory() { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->clear(); +} + +LiveDebugVariables::~LiveDebugVariables() { + if (pImpl) + delete static_cast<LDVImpl*>(pImpl); +} + +//===----------------------------------------------------------------------===// +// Live Range Splitting +//===----------------------------------------------------------------------===// + +bool +UserValue::splitLocation(unsigned OldLocNo, ArrayRef<unsigned> NewRegs, + LiveIntervals& LIS) { + DEBUG({ + dbgs() << "Splitting Loc" << OldLocNo << '\t'; + print(dbgs(), nullptr); + }); + bool DidChange = false; + LocMap::iterator LocMapI; + LocMapI.setMap(locInts); + for (unsigned i = 0; i != NewRegs.size(); ++i) { + LiveInterval *LI = &LIS.getInterval(NewRegs[i]); + if (LI->empty()) + continue; + + // Don't allocate the new LocNo until it is needed. + unsigned NewLocNo = ~0u; + + // Iterate over the overlaps between locInts and LI. + LocMapI.find(LI->beginIndex()); + if (!LocMapI.valid()) + continue; + LiveInterval::iterator LII = LI->advanceTo(LI->begin(), LocMapI.start()); + LiveInterval::iterator LIE = LI->end(); + while (LocMapI.valid() && LII != LIE) { + // At this point, we know that LocMapI.stop() > LII->start. + LII = LI->advanceTo(LII, LocMapI.start()); + if (LII == LIE) + break; + + // Now LII->end > LocMapI.start(). Do we have an overlap? + if (LocMapI.value() == OldLocNo && LII->start < LocMapI.stop()) { + // Overlapping correct location. Allocate NewLocNo now. + if (NewLocNo == ~0u) { + MachineOperand MO = MachineOperand::CreateReg(LI->reg, false); + MO.setSubReg(locations[OldLocNo].getSubReg()); + NewLocNo = getLocationNo(MO); + DidChange = true; + } + + SlotIndex LStart = LocMapI.start(); + SlotIndex LStop = LocMapI.stop(); + + // Trim LocMapI down to the LII overlap. + if (LStart < LII->start) + LocMapI.setStartUnchecked(LII->start); + if (LStop > LII->end) + LocMapI.setStopUnchecked(LII->end); + + // Change the value in the overlap. This may trigger coalescing. + LocMapI.setValue(NewLocNo); + + // Re-insert any removed OldLocNo ranges. + if (LStart < LocMapI.start()) { + LocMapI.insert(LStart, LocMapI.start(), OldLocNo); + ++LocMapI; + assert(LocMapI.valid() && "Unexpected coalescing"); + } + if (LStop > LocMapI.stop()) { + ++LocMapI; + LocMapI.insert(LII->end, LStop, OldLocNo); + --LocMapI; + } + } + + // Advance to the next overlap. + if (LII->end < LocMapI.stop()) { + if (++LII == LIE) + break; + LocMapI.advanceTo(LII->start); + } else { + ++LocMapI; + if (!LocMapI.valid()) + break; + LII = LI->advanceTo(LII, LocMapI.start()); + } + } + } + + // Finally, remove any remaining OldLocNo intervals and OldLocNo itself. + locations.erase(locations.begin() + OldLocNo); + LocMapI.goToBegin(); + while (LocMapI.valid()) { + unsigned v = LocMapI.value(); + if (v == OldLocNo) { + DEBUG(dbgs() << "Erasing [" << LocMapI.start() << ';' + << LocMapI.stop() << ")\n"); + LocMapI.erase(); + } else { + if (v > OldLocNo) + LocMapI.setValueUnchecked(v-1); + ++LocMapI; + } + } + + DEBUG({dbgs() << "Split result: \t"; print(dbgs(), nullptr);}); + return DidChange; +} + +bool +UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS) { + bool DidChange = false; + // Split locations referring to OldReg. Iterate backwards so splitLocation can + // safely erase unused locations. + for (unsigned i = locations.size(); i ; --i) { + unsigned LocNo = i-1; + const MachineOperand *Loc = &locations[LocNo]; + if (!Loc->isReg() || Loc->getReg() != OldReg) + continue; + DidChange |= splitLocation(LocNo, NewRegs, LIS); + } + return DidChange; +} + +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) { + bool DidChange = false; + for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); + + if (!DidChange) + return; + + // Map all of the new virtual registers. + UserValue *UV = lookupVirtReg(OldReg); + for (unsigned i = 0; i != NewRegs.size(); ++i) + mapVirtReg(NewRegs[i], UV); +} + +void LiveDebugVariables:: +splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, LiveIntervals &LIS) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->splitRegister(OldReg, NewRegs); +} + +void +UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { + // Iterate over locations in reverse makes it easier to handle coalescing. + for (unsigned i = locations.size(); i ; --i) { + unsigned LocNo = i-1; + MachineOperand &Loc = locations[LocNo]; + // Only virtual registers are rewritten. + if (!Loc.isReg() || !Loc.getReg() || + !TargetRegisterInfo::isVirtualRegister(Loc.getReg())) + continue; + unsigned VirtReg = Loc.getReg(); + if (VRM.isAssignedReg(VirtReg) && + TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) { + // This can create a %noreg operand in rare cases when the sub-register + // index is no longer available. That means the user value is in a + // non-existent sub-register, and %noreg is exactly what we want. + Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { + // FIXME: Translate SubIdx to a stackslot offset. + Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); + } else { + Loc.setReg(0); + Loc.setSubReg(0); + } + coalesceLocation(LocNo); + } +} + +/// findInsertLocation - Find an iterator for inserting a DBG_VALUE +/// instruction. +static MachineBasicBlock::iterator +findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx, + LiveIntervals &LIS) { + SlotIndex Start = LIS.getMBBStartIdx(MBB); + Idx = Idx.getBaseIndex(); + + // Try to find an insert location by going backwards from Idx. + MachineInstr *MI; + while (!(MI = LIS.getInstructionFromIndex(Idx))) { + // We've reached the beginning of MBB. + if (Idx == Start) { + MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin()); + return I; + } + Idx = Idx.getPrevIndex(); + } + + // Don't insert anything after the first terminator, though. + return MI->isTerminator() ? MBB->getFirstTerminator() : + std::next(MachineBasicBlock::iterator(MI)); +} + +void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, + unsigned LocNo, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); + MachineOperand &Loc = locations[LocNo]; + ++NumInsertedDebugValues; + + assert(cast<DILocalVariable>(Variable) + ->isValidLocationForIntrinsic(getDebugLoc()) && + "Expected inlined-at fields to agree"); + if (Loc.isReg()) + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, Loc.getReg(), offset, Variable, Expression); + else + BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc) + .addImm(offset) + .addMetadata(Variable) + .addMetadata(Expression); +} + +void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); + + for (LocMap::const_iterator I = locInts.begin(); I.valid();) { + SlotIndex Start = I.start(); + SlotIndex Stop = I.stop(); + unsigned LocNo = I.value(); + DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo); + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); + SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB); + + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + insertDebugValue(&*MBB, Start, LocNo, LIS, TII); + // This interval may span multiple basic blocks. + // Insert a DBG_VALUE into each one. + while(Stop > MBBEnd) { + // Move to the next block. + Start = MBBEnd; + if (++MBB == MFEnd) + break; + MBBEnd = LIS.getMBBEndIdx(&*MBB); + DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd); + insertDebugValue(&*MBB, Start, LocNo, LIS, TII); + } + DEBUG(dbgs() << '\n'); + if (MBB == MFEnd) + break; + + ++I; + } +} + +void LDVImpl::emitDebugValues(VirtRegMap *VRM) { + DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + if (!MF) + return; + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + DEBUG(userValues[i]->print(dbgs(), TRI)); + userValues[i]->rewriteLocations(*VRM, *TRI); + userValues[i]->emitDebugValues(VRM, *LIS, *TII); + } + EmitDone = true; +} + +void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); +} + +bool LiveDebugVariables::doInitialization(Module &M) { + return Pass::doInitialization(M); +} + +#ifndef NDEBUG +void LiveDebugVariables::dump() { + if (pImpl) + static_cast<LDVImpl*>(pImpl)->print(dbgs()); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h new file mode 100644 index 0000000..3d36f4d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -0,0 +1,75 @@ +//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface to the LiveDebugVariables analysis. +// +// The analysis removes DBG_VALUE instructions for virtual registers and tracks +// live user variables in a data structure that can be updated during register +// allocation. +// +// After register allocation new DBG_VALUE instructions are emitted to reflect +// the new locations of user variables. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H +#define LLVM_LIB_CODEGEN_LIVEDEBUGVARIABLES_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/IR/DebugInfo.h" + +namespace llvm { + +class LiveInterval; +class LiveIntervals; +class VirtRegMap; + +class LLVM_LIBRARY_VISIBILITY LiveDebugVariables : public MachineFunctionPass { + void *pImpl; + +public: + static char ID; // Pass identification, replacement for typeid + + LiveDebugVariables(); + ~LiveDebugVariables() override; + + /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx. + /// @param OldReg Old virtual register that is going away. + /// @param NewReg New register holding the user variables. + /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub- + /// register. + void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); + + /// splitRegister - Move any user variables in OldReg to the live ranges in + /// NewRegs where they are live. Mark the values as unavailable where no new + /// register is live. + void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs, + LiveIntervals &LIS); + + /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes + /// that happened during register allocation. + /// @param VRM Rename virtual registers according to map. + void emitDebugValues(VirtRegMap *VRM); + + /// dump - Print data structures to dbgs(). + void dump(); + +private: + + bool runOnMachineFunction(MachineFunction &) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &) const override; + bool doInitialization(Module &) override; + +}; + +} // namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp new file mode 100644 index 0000000..efad36f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -0,0 +1,1468 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRange and LiveInterval classes. Given some +// numbering of each the machine instructions an interval [i, j) is said to be a +// live range for register v if there is no instruction with number j' >= j +// such that v is live at j' and there is no instruction with number i' < i such +// that v is live at i'. In this implementation ranges can have holes, +// i.e. a range might look like [1,20), [50,65), [1000,1001). Each +// individual segment is represented as an instance of LiveRange::Segment, +// and the whole range is represented as an instance of LiveRange. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveInterval.h" +#include "RegisterCoalescer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> +using namespace llvm; + +namespace { +//===----------------------------------------------------------------------===// +// Implementation of various methods necessary for calculation of live ranges. +// The implementation of the methods abstracts from the concrete type of the +// segment collection. +// +// Implementation of the class follows the Template design pattern. The base +// class contains generic algorithms that call collection-specific methods, +// which are provided in concrete subclasses. In order to avoid virtual calls +// these methods are provided by means of C++ template instantiation. +// The base class calls the methods of the subclass through method impl(), +// which casts 'this' pointer to the type of the subclass. +// +//===----------------------------------------------------------------------===// + +template <typename ImplT, typename IteratorT, typename CollectionT> +class CalcLiveRangeUtilBase { +protected: + LiveRange *LR; + +protected: + CalcLiveRangeUtilBase(LiveRange *LR) : LR(LR) {} + +public: + typedef LiveRange::Segment Segment; + typedef IteratorT iterator; + + VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) { + assert(!Def.isDead() && "Cannot define a value at the dead slot"); + + iterator I = impl().find(Def); + if (I == segments().end()) { + VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + + Segment *S = segmentAt(I); + if (SlotIndex::isSameInstr(Def, S->start)) { + assert(S->valno->def == S->start && "Inconsistent existing value def"); + + // It is possible to have both normal and early-clobber defs of the same + // register on an instruction. It doesn't make a lot of sense, but it is + // possible to specify in inline assembly. + // + // Just convert everything to early-clobber. + Def = std::min(Def, S->start); + if (Def != S->start) + S->start = S->valno->def = Def; + return S->valno; + } + assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def"); + VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator); + segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI)); + return VNI; + } + + VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use) { + if (segments().empty()) + return nullptr; + iterator I = + impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr)); + if (I == segments().begin()) + return nullptr; + --I; + if (I->end <= StartIdx) + return nullptr; + if (I->end < Use) + extendSegmentEndTo(I, Use); + return I->valno; + } + + /// This method is used when we want to extend the segment specified + /// by I to end at the specified endpoint. To do this, we should + /// merge and eliminate all segments that this will overlap + /// with. The iterator is not invalidated. + void extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != segments().end() && "Not a valid segment!"); + Segment *S = segmentAt(I); + VNInfo *ValNo = I->valno; + + // Search for the first segment that we can't merge with. + iterator MergeTo = std::next(I); + for (; MergeTo != segments().end() && NewEnd >= MergeTo->end; ++MergeTo) + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + + // If NewEnd was in the middle of a segment, make sure to get its endpoint. + S->end = std::max(NewEnd, std::prev(MergeTo)->end); + + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != segments().end() && MergeTo->start <= I->end && + MergeTo->valno == ValNo) { + S->end = MergeTo->end; + ++MergeTo; + } + + // Erase any dead segments. + segments().erase(std::next(I), MergeTo); + } + + /// This method is used when we want to extend the segment specified + /// by I to start at the specified endpoint. To do this, we should + /// merge and eliminate all segments that this will overlap with. + iterator extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != segments().end() && "Not a valid segment!"); + Segment *S = segmentAt(I); + VNInfo *ValNo = I->valno; + + // Search for the first segment that we can't merge with. + iterator MergeTo = I; + do { + if (MergeTo == segments().begin()) { + S->start = NewStart; + segments().erase(MergeTo, I); + return I; + } + assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); + --MergeTo; + } while (NewStart <= MergeTo->start); + + // If we start in the middle of another segment, just delete a range and + // extend that segment. + if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { + segmentAt(MergeTo)->end = S->end; + } else { + // Otherwise, extend the segment right after. + ++MergeTo; + Segment *MergeToSeg = segmentAt(MergeTo); + MergeToSeg->start = NewStart; + MergeToSeg->end = S->end; + } + + segments().erase(std::next(MergeTo), std::next(I)); + return MergeTo; + } + + iterator addSegment(Segment S) { + SlotIndex Start = S.start, End = S.end; + iterator I = impl().findInsertPos(S); + + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (I != segments().begin()) { + iterator B = std::prev(I); + if (S.valno == B->valno) { + if (B->start <= Start && B->end >= Start) { + extendSegmentEndTo(B, End); + return B; + } + } else { + // Check to make sure that we are not overlapping two live segments with + // different valno's. + assert(B->end <= Start && + "Cannot overlap two segments with differing ValID's" + " (did you def the same reg twice in a MachineInstr?)"); + } + } + + // Otherwise, if this segment ends in the middle of, or right next + // to, another segment, merge it into that segment. + if (I != segments().end()) { + if (S.valno == I->valno) { + if (I->start <= End) { + I = extendSegmentStartTo(I, Start); + + // If S is a complete superset of a segment, we may need to grow its + // endpoint as well. + if (End > I->end) + extendSegmentEndTo(I, End); + return I; + } + } else { + // Check to make sure that we are not overlapping two live segments with + // different valno's. + assert(I->start >= End && + "Cannot overlap two segments with differing ValID's"); + } + } + + // Otherwise, this is just a new segment that doesn't interact with + // anything. + // Insert it. + return segments().insert(I, S); + } + +private: + ImplT &impl() { return *static_cast<ImplT *>(this); } + + CollectionT &segments() { return impl().segmentsColl(); } + + Segment *segmentAt(iterator I) { return const_cast<Segment *>(&(*I)); } +}; + +//===----------------------------------------------------------------------===// +// Instantiation of the methods for calculation of live ranges +// based on a segment vector. +//===----------------------------------------------------------------------===// + +class CalcLiveRangeUtilVector; +typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilVector, LiveRange::iterator, + LiveRange::Segments> CalcLiveRangeUtilVectorBase; + +class CalcLiveRangeUtilVector : public CalcLiveRangeUtilVectorBase { +public: + CalcLiveRangeUtilVector(LiveRange *LR) : CalcLiveRangeUtilVectorBase(LR) {} + +private: + friend CalcLiveRangeUtilVectorBase; + + LiveRange::Segments &segmentsColl() { return LR->segments; } + + void insertAtEnd(const Segment &S) { LR->segments.push_back(S); } + + iterator find(SlotIndex Pos) { return LR->find(Pos); } + + iterator findInsertPos(Segment S) { + return std::upper_bound(LR->begin(), LR->end(), S.start); + } +}; + +//===----------------------------------------------------------------------===// +// Instantiation of the methods for calculation of live ranges +// based on a segment set. +//===----------------------------------------------------------------------===// + +class CalcLiveRangeUtilSet; +typedef CalcLiveRangeUtilBase<CalcLiveRangeUtilSet, + LiveRange::SegmentSet::iterator, + LiveRange::SegmentSet> CalcLiveRangeUtilSetBase; + +class CalcLiveRangeUtilSet : public CalcLiveRangeUtilSetBase { +public: + CalcLiveRangeUtilSet(LiveRange *LR) : CalcLiveRangeUtilSetBase(LR) {} + +private: + friend CalcLiveRangeUtilSetBase; + + LiveRange::SegmentSet &segmentsColl() { return *LR->segmentSet; } + + void insertAtEnd(const Segment &S) { + LR->segmentSet->insert(LR->segmentSet->end(), S); + } + + iterator find(SlotIndex Pos) { + iterator I = + LR->segmentSet->upper_bound(Segment(Pos, Pos.getNextSlot(), nullptr)); + if (I == LR->segmentSet->begin()) + return I; + iterator PrevI = std::prev(I); + if (Pos < (*PrevI).end) + return PrevI; + return I; + } + + iterator findInsertPos(Segment S) { + iterator I = LR->segmentSet->upper_bound(S); + if (I != LR->segmentSet->end() && !(S.start < *I)) + ++I; + return I; + } +}; +} // namespace + +//===----------------------------------------------------------------------===// +// LiveRange methods +//===----------------------------------------------------------------------===// + +LiveRange::iterator LiveRange::find(SlotIndex Pos) { + // This algorithm is basically std::upper_bound. + // Unfortunately, std::upper_bound cannot be used with mixed types until we + // adopt C++0x. Many libraries can do it, but not all. + if (empty() || Pos >= endIndex()) + return end(); + iterator I = begin(); + size_t Len = size(); + do { + size_t Mid = Len >> 1; + if (Pos < I[Mid].end) + Len = Mid; + else + I += Mid + 1, Len -= Mid + 1; + } while (Len); + return I; +} + +VNInfo *LiveRange::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator); +} + +// overlaps - Return true if the intersection of the two live ranges is +// not empty. +// +// An example for overlaps(): +// +// 0: A = ... +// 4: B = ... +// 8: C = A + B ;; last use of A +// +// The live ranges should look like: +// +// A = [3, 11) +// B = [7, x) +// C = [11, y) +// +// A->overlaps(C) should return false since we want to be able to join +// A and C. +// +bool LiveRange::overlapsFrom(const LiveRange& other, + const_iterator StartPos) const { + assert(!empty() && "empty range"); + const_iterator i = begin(); + const_iterator ie = end(); + const_iterator j = StartPos; + const_iterator je = other.end(); + + assert((StartPos->start <= i->start || StartPos == other.begin()) && + StartPos != other.end() && "Bogus start position hint!"); + + if (i->start < j->start) { + i = std::upper_bound(i, ie, j->start); + if (i != begin()) --i; + } else if (j->start < i->start) { + ++StartPos; + if (StartPos != other.end() && StartPos->start <= i->start) { + assert(StartPos < other.end() && i < end()); + j = std::upper_bound(j, je, i->start); + if (j != other.begin()) --j; + } + } else { + return true; + } + + if (j == je) return false; + + while (i != ie) { + if (i->start > j->start) { + std::swap(i, j); + std::swap(ie, je); + } + + if (i->end > j->start) + return true; + ++i; + } + + return false; +} + +bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty range"); + if (Other.empty()) + return false; + + // Use binary searches to find initial positions. + const_iterator I = find(Other.beginIndex()); + const_iterator IE = end(); + if (I == IE) + return false; + const_iterator J = Other.find(I->start); + const_iterator JE = Other.end(); + if (J == JE) + return false; + + for (;;) { + // J has just been advanced to satisfy: + assert(J->end >= I->start); + // Check for an overlap. + if (J->start < I->end) { + // I and J are overlapping. Find the later start. + SlotIndex Def = std::max(I->start, J->start); + // Allow the overlap if Def is a coalescable copy. + if (Def.isBlock() || + !CP.isCoalescable(Indexes.getInstructionFromIndex(Def))) + return true; + } + // Advance the iterator that ends first to check for more overlaps. + if (J->end > I->end) { + std::swap(I, J); + std::swap(IE, JE); + } + // Advance J until J->end >= I->start. + do + if (++J == JE) + return false; + while (J->end < I->start); + } +} + +/// overlaps - Return true if the live range overlaps an interval specified +/// by [Start, End). +bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { + assert(Start < End && "Invalid range"); + const_iterator I = std::lower_bound(begin(), end(), End); + return I != begin() && (--I)->end > Start; +} + +bool LiveRange::covers(const LiveRange &Other) const { + if (empty()) + return Other.empty(); + + const_iterator I = begin(); + for (const Segment &O : Other.segments) { + I = advanceTo(I, O.start); + if (I == end() || I->start > O.start) + return false; + + // Check adjacent live segments and see if we can get behind O.end. + while (I->end < O.end) { + const_iterator Last = I; + // Get next segment and abort if it was not adjacent. + ++I; + if (I == end() || Last->end != I->start) + return false; + } + } + return true; +} + +/// ValNo is dead, remove it. If it is the largest value number, just nuke it +/// (and any other deleted values neighboring it), otherwise mark it as ~1U so +/// it can be nuked later. +void LiveRange::markValNoForDeletion(VNInfo *ValNo) { + if (ValNo->id == getNumValNums()-1) { + do { + valnos.pop_back(); + } while (!valnos.empty() && valnos.back()->isUnused()); + } else { + ValNo->markUnused(); + } +} + +/// RenumberValues - Renumber all values in order of appearance and delete the +/// remaining unused values. +void LiveRange::RenumberValues() { + SmallPtrSet<VNInfo*, 8> Seen; + valnos.clear(); + for (const Segment &S : segments) { + VNInfo *VNI = S.valno; + if (!Seen.insert(VNI).second) + continue; + assert(!VNI->isUnused() && "Unused valno used by live segment"); + VNI->id = (unsigned)valnos.size(); + valnos.push_back(VNI); + } +} + +void LiveRange::addSegmentToSet(Segment S) { + CalcLiveRangeUtilSet(this).addSegment(S); +} + +LiveRange::iterator LiveRange::addSegment(Segment S) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) { + addSegmentToSet(S); + return end(); + } + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).addSegment(S); +} + +void LiveRange::append(const Segment S) { + // Check that the segment belongs to the back of the list. + assert(segments.empty() || segments.back().end <= S.start); + segments.push_back(S); +} + +/// extendInBlock - If this range is live before Kill in the basic +/// block that starts at StartIdx, extend it to be live up to Kill and return +/// the value. If there is no live range before Kill, return NULL. +VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { + // Use the segment set, if it is available. + if (segmentSet != nullptr) + return CalcLiveRangeUtilSet(this).extendInBlock(StartIdx, Kill); + // Otherwise use the segment vector. + return CalcLiveRangeUtilVector(this).extendInBlock(StartIdx, Kill); +} + +/// Remove the specified segment from this range. Note that the segment must +/// be in a single Segment in its entirety. +void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the Segment containing this span. + iterator I = find(Start); + assert(I != end() && "Segment is not in range!"); + assert(I->containsInterval(Start, End) + && "Segment is not entirely in range!"); + + // If the span we are removing is at the start of the Segment, adjust it. + VNInfo *ValNo = I->valno; + if (I->start == Start) { + if (I->end == End) { + if (RemoveDeadValNo) { + // Check if val# is dead. + bool isDead = true; + for (const_iterator II = begin(), EE = end(); II != EE; ++II) + if (II != I && II->valno == ValNo) { + isDead = false; + break; + } + if (isDead) { + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); + } + } + + segments.erase(I); // Removed the whole Segment. + } else + I->start = End; + return; + } + + // Otherwise if the span we are removing is at the end of the Segment, + // adjust the other way. + if (I->end == End) { + I->end = Start; + return; + } + + // Otherwise, we are splitting the Segment into two pieces. + SlotIndex OldEnd = I->end; + I->end = Start; // Trim the old segment. + + // Insert the new one. + segments.insert(std::next(I), Segment(End, OldEnd, ValNo)); +} + +/// removeValNo - Remove all the segments defined by the specified value#. +/// Also remove the value# from value# list. +void LiveRange::removeValNo(VNInfo *ValNo) { + if (empty()) return; + segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) { + return S.valno == ValNo; + }), end()); + // Now that ValNo is dead, remove it. + markValNoForDeletion(ValNo); +} + +void LiveRange::join(LiveRange &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVectorImpl<VNInfo *> &NewVNInfo) { + verify(); + + // Determine if any of our values are mapped. This is uncommon, so we want + // to avoid the range scan if not. + bool MustMapCurValNos = false; + unsigned NumVals = getNumValNums(); + unsigned NumNewVals = NewVNInfo.size(); + for (unsigned i = 0; i != NumVals; ++i) { + unsigned LHSValID = LHSValNoAssignments[i]; + if (i != LHSValID || + (NewVNInfo[LHSValID] && NewVNInfo[LHSValID] != getValNumInfo(i))) { + MustMapCurValNos = true; + break; + } + } + + // If we have to apply a mapping to our base range assignment, rewrite it now. + if (MustMapCurValNos && !empty()) { + // Map the first live range. + + iterator OutIt = begin(); + OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; + for (iterator I = std::next(OutIt), E = end(); I != E; ++I) { + VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; + assert(nextValNo && "Huh?"); + + // If this live range has the same value # as its immediate predecessor, + // and if they are neighbors, remove one Segment. This happens when we + // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. + if (OutIt->valno == nextValNo && OutIt->end == I->start) { + OutIt->end = I->end; + } else { + // Didn't merge. Move OutIt to the next segment, + ++OutIt; + OutIt->valno = nextValNo; + if (OutIt != I) { + OutIt->start = I->start; + OutIt->end = I->end; + } + } + } + // If we merge some segments, chop off the end. + ++OutIt; + segments.erase(OutIt, end()); + } + + // Rewrite Other values before changing the VNInfo ids. + // This can leave Other in an invalid state because we're not coalescing + // touching segments that now have identical values. That's OK since Other is + // not supposed to be valid after calling join(); + for (Segment &S : Other.segments) + S.valno = NewVNInfo[RHSValNoAssignments[S.valno->id]]; + + // Update val# info. Renumber them and make sure they all belong to this + // LiveRange now. Also remove dead val#'s. + unsigned NumValNos = 0; + for (unsigned i = 0; i < NumNewVals; ++i) { + VNInfo *VNI = NewVNInfo[i]; + if (VNI) { + if (NumValNos >= NumVals) + valnos.push_back(VNI); + else + valnos[NumValNos] = VNI; + VNI->id = NumValNos++; // Renumber val#. + } + } + if (NumNewVals < NumVals) + valnos.resize(NumNewVals); // shrinkify + + // Okay, now insert the RHS live segments into the LHS. + LiveRangeUpdater Updater(this); + for (Segment &S : Other.segments) + Updater.add(S); +} + +/// Merge all of the segments in RHS into this live range as the specified +/// value number. The segments in RHS are allowed to overlap with segments in +/// the current range, but only if the overlapping segments have the +/// specified value number. +void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, + VNInfo *LHSValNo) { + LiveRangeUpdater Updater(this); + for (const Segment &S : RHS.segments) + Updater.add(S.start, S.end, LHSValNo); +} + +/// MergeValueInAsValue - Merge all of the live segments of a specific val# +/// in RHS into this live range as the specified value number. +/// The segments in RHS are allowed to overlap with segments in the +/// current range, it will replace the value numbers of the overlaped +/// segments with the specified value number. +void LiveRange::MergeValueInAsValue(const LiveRange &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { + LiveRangeUpdater Updater(this); + for (const Segment &S : RHS.segments) + if (S.valno == RHSValNo) + Updater.add(S.start, S.end, LHSValNo); +} + +/// MergeValueNumberInto - This method is called when two value nubmers +/// are found to be equivalent. This eliminates V1, replacing all +/// segments with the V1 value number with the V2 value number. This can +/// cause merging of V1/V2 values numbers and compaction of the value space. +VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { + assert(V1 != V2 && "Identical value#'s are always equivalent!"); + + // This code actually merges the (numerically) larger value number into the + // smaller value number, which is likely to allow us to compactify the value + // space. The only thing we have to be careful of is to preserve the + // instruction that defines the result value. + + // Make sure V2 is smaller than V1. + if (V1->id < V2->id) { + V1->copyFrom(*V2); + std::swap(V1, V2); + } + + // Merge V1 segments into V2. + for (iterator I = begin(); I != end(); ) { + iterator S = I++; + if (S->valno != V1) continue; // Not a V1 Segment. + + // Okay, we found a V1 live range. If it had a previous, touching, V2 live + // range, extend it. + if (S != begin()) { + iterator Prev = S-1; + if (Prev->valno == V2 && Prev->end == S->start) { + Prev->end = S->end; + + // Erase this live-range. + segments.erase(S); + I = Prev+1; + S = Prev; + } + } + + // Okay, now we have a V1 or V2 live range that is maximally merged forward. + // Ensure that it is a V2 live-range. + S->valno = V2; + + // If we can merge it into later V2 segments, do so now. We ignore any + // following V1 segments, as they will be merged in subsequent iterations + // of the loop. + if (I != end()) { + if (I->start == S->end && I->valno == V2) { + S->end = I->end; + segments.erase(I); + I = S+1; + } + } + } + + // Now that V1 is dead, remove it. + markValNoForDeletion(V1); + + return V2; +} + +void LiveRange::flushSegmentSet() { + assert(segmentSet != nullptr && "segment set must have been created"); + assert( + segments.empty() && + "segment set can be used only initially before switching to the array"); + segments.append(segmentSet->begin(), segmentSet->end()); + segmentSet = nullptr; + verify(); +} + +void LiveInterval::freeSubRange(SubRange *S) { + S->~SubRange(); + // Memory was allocated with BumpPtr allocator and is not freed here. +} + +void LiveInterval::removeEmptySubRanges() { + SubRange **NextPtr = &SubRanges; + SubRange *I = *NextPtr; + while (I != nullptr) { + if (!I->empty()) { + NextPtr = &I->Next; + I = *NextPtr; + continue; + } + // Skip empty subranges until we find the first nonempty one. + do { + SubRange *Next = I->Next; + freeSubRange(I); + I = Next; + } while (I != nullptr && I->empty()); + *NextPtr = I; + } +} + +void LiveInterval::clearSubRanges() { + for (SubRange *I = SubRanges, *Next; I != nullptr; I = Next) { + Next = I->Next; + freeSubRange(I); + } + SubRanges = nullptr; +} + +/// Helper function for constructMainRangeFromSubranges(): Search the CFG +/// backwards until we find a place covered by a LiveRange segment that actually +/// has a valno set. +static VNInfo *searchForVNI(const SlotIndexes &Indexes, LiveRange &LR, + const MachineBasicBlock *MBB, + SmallPtrSetImpl<const MachineBasicBlock*> &Visited) { + // We start the search at the end of MBB. + SlotIndex EndIdx = Indexes.getMBBEndIdx(MBB); + // In our use case we can't live the area covered by the live segments without + // finding an actual VNI def. + LiveRange::iterator I = LR.find(EndIdx.getPrevSlot()); + assert(I != LR.end()); + LiveRange::Segment &S = *I; + if (S.valno != nullptr) + return S.valno; + + VNInfo *VNI = nullptr; + // Continue at predecessors (we could even go to idom with domtree available). + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + // Avoid going in circles. + if (!Visited.insert(Pred).second) + continue; + + VNI = searchForVNI(Indexes, LR, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } + } + + return VNI; +} + +static void determineMissingVNIs(const SlotIndexes &Indexes, LiveInterval &LI) { + SmallPtrSet<const MachineBasicBlock*, 5> Visited; + + LiveRange::iterator OutIt; + VNInfo *PrevValNo = nullptr; + for (LiveRange::iterator I = LI.begin(), E = LI.end(); I != E; ++I) { + LiveRange::Segment &S = *I; + // Determine final VNI if necessary. + if (S.valno == nullptr) { + // This can only happen at the begin of a basic block. + assert(S.start.isBlock() && "valno should only be missing at block begin"); + + Visited.clear(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(S.start); + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + VNInfo *VNI = searchForVNI(Indexes, LI, Pred, Visited); + if (VNI != nullptr) { + S.valno = VNI; + break; + } + } + assert(S.valno != nullptr && "could not determine valno"); + } + // Merge with previous segment if it has the same VNI. + if (PrevValNo == S.valno && OutIt->end == S.start) { + OutIt->end = S.end; + } else { + // Didn't merge. Move OutIt to next segment. + if (PrevValNo == nullptr) + OutIt = LI.begin(); + else + ++OutIt; + + if (OutIt != I) + *OutIt = *I; + PrevValNo = S.valno; + } + } + // If we merged some segments chop off the end. + ++OutIt; + LI.segments.erase(OutIt, LI.end()); +} + +void LiveInterval::constructMainRangeFromSubranges( + const SlotIndexes &Indexes, VNInfo::Allocator &VNIAllocator) { + // The basic observations on which this algorithm is based: + // - Each Def/ValNo in a subrange must have a corresponding def on the main + // range, but not further defs/valnos are necessary. + // - If any of the subranges is live at a point the main liverange has to be + // live too, conversily if no subrange is live the main range mustn't be + // live either. + // We do this by scanning through all the subranges simultaneously creating new + // segments in the main range as segments start/ends come up in the subranges. + assert(hasSubRanges() && "expected subranges to be present"); + assert(segments.empty() && valnos.empty() && "expected empty main range"); + + // Collect subrange, iterator pairs for the walk and determine first and last + // SlotIndex involved. + SmallVector<std::pair<const SubRange*, const_iterator>, 4> SRs; + SlotIndex First; + SlotIndex Last; + for (const SubRange &SR : subranges()) { + if (SR.empty()) + continue; + SRs.push_back(std::make_pair(&SR, SR.begin())); + if (!First.isValid() || SR.segments.front().start < First) + First = SR.segments.front().start; + if (!Last.isValid() || SR.segments.back().end > Last) + Last = SR.segments.back().end; + } + + // Walk over all subranges simultaneously. + Segment CurrentSegment; + bool ConstructingSegment = false; + bool NeedVNIFixup = false; + LaneBitmask ActiveMask = 0; + SlotIndex Pos = First; + while (true) { + SlotIndex NextPos = Last; + enum { + NOTHING, + BEGIN_SEGMENT, + END_SEGMENT, + } Event = NOTHING; + // Which subregister lanes are affected by the current event. + LaneBitmask EventMask = 0; + // Whether a BEGIN_SEGMENT is also a valno definition point. + bool IsDef = false; + // Find the next begin or end of a subrange segment. Combine masks if we + // have multiple begins/ends at the same position. Ends take precedence over + // Begins. + for (auto &SRP : SRs) { + const SubRange &SR = *SRP.first; + const_iterator &I = SRP.second; + // Advance iterator of subrange to a segment involving Pos; the earlier + // segments are already merged at this point. + while (I != SR.end() && + (I->end < Pos || + (I->end == Pos && (ActiveMask & SR.LaneMask) == 0))) + ++I; + if (I == SR.end()) + continue; + if ((ActiveMask & SR.LaneMask) == 0 && + Pos <= I->start && I->start <= NextPos) { + // Merge multiple begins at the same position. + if (I->start == NextPos && Event == BEGIN_SEGMENT) { + EventMask |= SR.LaneMask; + IsDef |= I->valno->def == I->start; + } else if (I->start < NextPos || Event != END_SEGMENT) { + Event = BEGIN_SEGMENT; + NextPos = I->start; + EventMask = SR.LaneMask; + IsDef = I->valno->def == I->start; + } + } + if ((ActiveMask & SR.LaneMask) != 0 && + Pos <= I->end && I->end <= NextPos) { + // Merge multiple ends at the same position. + if (I->end == NextPos && Event == END_SEGMENT) + EventMask |= SR.LaneMask; + else { + Event = END_SEGMENT; + NextPos = I->end; + EventMask = SR.LaneMask; + } + } + } + + // Advance scan position. + Pos = NextPos; + if (Event == BEGIN_SEGMENT) { + if (ConstructingSegment && IsDef) { + // Finish previous segment because we have to start a new one. + CurrentSegment.end = Pos; + append(CurrentSegment); + ConstructingSegment = false; + } + + // Start a new segment if necessary. + if (!ConstructingSegment) { + // Determine value number for the segment. + VNInfo *VNI; + if (IsDef) { + VNI = getNextValue(Pos, VNIAllocator); + } else { + // We have to reuse an existing value number, if we are lucky + // then we already passed one of the predecessor blocks and determined + // its value number (with blocks in reverse postorder this would be + // always true but we have no such guarantee). + assert(Pos.isBlock()); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Pos); + // See if any of the predecessor blocks has a lower number and a VNI + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + SlotIndex PredEnd = Indexes.getMBBEndIdx(Pred); + VNI = getVNInfoBefore(PredEnd); + if (VNI != nullptr) + break; + } + // Def will come later: We have to do an extra fixup pass. + if (VNI == nullptr) + NeedVNIFixup = true; + } + + // In rare cases we can produce adjacent segments with the same value + // number (if they come from different subranges, but happen to have + // the same defining instruction). VNIFixup will fix those cases. + if (!empty() && segments.back().end == Pos && + segments.back().valno == VNI) + NeedVNIFixup = true; + CurrentSegment.start = Pos; + CurrentSegment.valno = VNI; + ConstructingSegment = true; + } + ActiveMask |= EventMask; + } else if (Event == END_SEGMENT) { + assert(ConstructingSegment); + // Finish segment if no lane is active anymore. + ActiveMask &= ~EventMask; + if (ActiveMask == 0) { + CurrentSegment.end = Pos; + append(CurrentSegment); + ConstructingSegment = false; + } + } else { + // We reached the end of the last subranges and can stop. + assert(Event == NOTHING); + break; + } + } + + // We might not be able to assign new valnos for all segments if the basic + // block containing the definition comes after a segment using the valno. + // Do a fixup pass for this uncommon case. + if (NeedVNIFixup) + determineMissingVNIs(Indexes, *this); + + assert(ActiveMask == 0 && !ConstructingSegment && "all segments ended"); + verify(); +} + +unsigned LiveInterval::getSize() const { + unsigned Sum = 0; + for (const Segment &S : segments) + Sum += S.start.distance(S.end); + return Sum; +} + +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { + return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::Segment::dump() const { + dbgs() << *this << "\n"; +} +#endif + +void LiveRange::print(raw_ostream &OS) const { + if (empty()) + OS << "EMPTY"; + else { + for (const Segment &S : segments) { + OS << S; + assert(S.valno == getValNumInfo(S.valno->id) && "Bad VNInfo"); + } + } + + // Print value number info. + if (getNumValNums()) { + OS << " "; + unsigned vnum = 0; + for (const_vni_iterator i = vni_begin(), e = vni_end(); i != e; + ++i, ++vnum) { + const VNInfo *vni = *i; + if (vnum) OS << " "; + OS << vnum << "@"; + if (vni->isUnused()) { + OS << "x"; + } else { + OS << vni->def; + if (vni->isPHIDef()) + OS << "-phi"; + } + } + } +} + +void LiveInterval::print(raw_ostream &OS) const { + OS << PrintReg(reg) << ' '; + super::print(OS); + // Print subranges + for (const SubRange &SR : subranges()) { + OS << " L" << PrintLaneMask(SR.LaneMask) << ' ' << SR; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} + +void LiveInterval::dump() const { + dbgs() << *this << "\n"; +} +#endif + +#ifndef NDEBUG +void LiveRange::verify() const { + for (const_iterator I = begin(), E = end(); I != E; ++I) { + assert(I->start.isValid()); + assert(I->end.isValid()); + assert(I->start < I->end); + assert(I->valno != nullptr); + assert(I->valno->id < valnos.size()); + assert(I->valno == valnos[I->valno->id]); + if (std::next(I) != E) { + assert(I->end <= std::next(I)->start); + if (I->end == std::next(I)->start) + assert(I->valno != std::next(I)->valno); + } + } +} + +void LiveInterval::verify(const MachineRegisterInfo *MRI) const { + super::verify(); + + // Make sure SubRanges are fine and LaneMasks are disjunct. + LaneBitmask Mask = 0; + LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u; + for (const SubRange &SR : subranges()) { + // Subrange lanemask should be disjunct to any previous subrange masks. + assert((Mask & SR.LaneMask) == 0); + Mask |= SR.LaneMask; + + // subrange mask should not contained in maximum lane mask for the vreg. + assert((Mask & ~MaxMask) == 0); + // empty subranges must be removed. + assert(!SR.empty()); + + SR.verify(); + // Main liverange should cover subrange. + assert(covers(SR)); + } +} +#endif + + +//===----------------------------------------------------------------------===// +// LiveRangeUpdater class +//===----------------------------------------------------------------------===// +// +// The LiveRangeUpdater class always maintains these invariants: +// +// - When LastStart is invalid, Spills is empty and the iterators are invalid. +// This is the initial state, and the state created by flush(). +// In this state, isDirty() returns false. +// +// Otherwise, segments are kept in three separate areas: +// +// 1. [begin; WriteI) at the front of LR. +// 2. [ReadI; end) at the back of LR. +// 3. Spills. +// +// - LR.begin() <= WriteI <= ReadI <= LR.end(). +// - Segments in all three areas are fully ordered and coalesced. +// - Segments in area 1 precede and can't coalesce with segments in area 2. +// - Segments in Spills precede and can't coalesce with segments in area 2. +// - No coalescing is possible between segments in Spills and segments in area +// 1, and there are no overlapping segments. +// +// The segments in Spills are not ordered with respect to the segments in area +// 1. They need to be merged. +// +// When they exist, Spills.back().start <= LastStart, +// and WriteI[-1].start <= LastStart. + +void LiveRangeUpdater::print(raw_ostream &OS) const { + if (!isDirty()) { + if (LR) + OS << "Clean updater: " << *LR << '\n'; + else + OS << "Null updater.\n"; + return; + } + assert(LR && "Can't have null LR in dirty updater."); + OS << " updater with gap = " << (ReadI - WriteI) + << ", last start = " << LastStart + << ":\n Area 1:"; + for (const auto &S : make_range(LR->begin(), WriteI)) + OS << ' ' << S; + OS << "\n Spills:"; + for (unsigned I = 0, E = Spills.size(); I != E; ++I) + OS << ' ' << Spills[I]; + OS << "\n Area 2:"; + for (const auto &S : make_range(ReadI, LR->end())) + OS << ' ' << S; + OS << '\n'; +} + +void LiveRangeUpdater::dump() const +{ + print(errs()); +} + +// Determine if A and B should be coalesced. +static inline bool coalescable(const LiveRange::Segment &A, + const LiveRange::Segment &B) { + assert(A.start <= B.start && "Unordered live segments."); + if (A.end == B.start) + return A.valno == B.valno; + if (A.end < B.start) + return false; + assert(A.valno == B.valno && "Cannot overlap different values"); + return true; +} + +void LiveRangeUpdater::add(LiveRange::Segment Seg) { + assert(LR && "Cannot add to a null destination"); + + // Fall back to the regular add method if the live range + // is using the segment set instead of the segment vector. + if (LR->segmentSet != nullptr) { + LR->addSegmentToSet(Seg); + return; + } + + // Flush the state if Start moves backwards. + if (!LastStart.isValid() || LastStart > Seg.start) { + if (isDirty()) + flush(); + // This brings us to an uninitialized state. Reinitialize. + assert(Spills.empty() && "Leftover spilled segments"); + WriteI = ReadI = LR->begin(); + } + + // Remember start for next time. + LastStart = Seg.start; + + // Advance ReadI until it ends after Seg.start. + LiveRange::iterator E = LR->end(); + if (ReadI != E && ReadI->end <= Seg.start) { + // First try to close the gap between WriteI and ReadI with spills. + if (ReadI != WriteI) + mergeSpills(); + // Then advance ReadI. + if (ReadI == WriteI) + ReadI = WriteI = LR->find(Seg.start); + else + while (ReadI != E && ReadI->end <= Seg.start) + *WriteI++ = *ReadI++; + } + + assert(ReadI == E || ReadI->end > Seg.start); + + // Check if the ReadI segment begins early. + if (ReadI != E && ReadI->start <= Seg.start) { + assert(ReadI->valno == Seg.valno && "Cannot overlap different values"); + // Bail if Seg is completely contained in ReadI. + if (ReadI->end >= Seg.end) + return; + // Coalesce into Seg. + Seg.start = ReadI->start; + ++ReadI; + } + + // Coalesce as much as possible from ReadI into Seg. + while (ReadI != E && coalescable(Seg, *ReadI)) { + Seg.end = std::max(Seg.end, ReadI->end); + ++ReadI; + } + + // Try coalescing Spills.back() into Seg. + if (!Spills.empty() && coalescable(Spills.back(), Seg)) { + Seg.start = Spills.back().start; + Seg.end = std::max(Spills.back().end, Seg.end); + Spills.pop_back(); + } + + // Try coalescing Seg into WriteI[-1]. + if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) { + WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); + return; + } + + // Seg doesn't coalesce with anything, and needs to be inserted somewhere. + if (WriteI != ReadI) { + *WriteI++ = Seg; + return; + } + + // Finally, append to LR or Spills. + if (WriteI == E) { + LR->segments.push_back(Seg); + WriteI = ReadI = LR->end(); + } else + Spills.push_back(Seg); +} + +// Merge as many spilled segments as possible into the gap between WriteI +// and ReadI. Advance WriteI to reflect the inserted instructions. +void LiveRangeUpdater::mergeSpills() { + // Perform a backwards merge of Spills and [SpillI;WriteI). + size_t GapSize = ReadI - WriteI; + size_t NumMoved = std::min(Spills.size(), GapSize); + LiveRange::iterator Src = WriteI; + LiveRange::iterator Dst = Src + NumMoved; + LiveRange::iterator SpillSrc = Spills.end(); + LiveRange::iterator B = LR->begin(); + + // This is the new WriteI position after merging spills. + WriteI = Dst; + + // Now merge Src and Spills backwards. + while (Src != Dst) { + if (Src != B && Src[-1].start > SpillSrc[-1].start) + *--Dst = *--Src; + else + *--Dst = *--SpillSrc; + } + assert(NumMoved == size_t(Spills.end() - SpillSrc)); + Spills.erase(SpillSrc, Spills.end()); +} + +void LiveRangeUpdater::flush() { + if (!isDirty()) + return; + // Clear the dirty state. + LastStart = SlotIndex(); + + assert(LR && "Cannot add to a null destination"); + + // Nothing to merge? + if (Spills.empty()) { + LR->segments.erase(WriteI, ReadI); + LR->verify(); + return; + } + + // Resize the WriteI - ReadI gap to match Spills. + size_t GapSize = ReadI - WriteI; + if (GapSize < Spills.size()) { + // The gap is too small. Make some room. + size_t WritePos = WriteI - LR->begin(); + LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment()); + // This also invalidated ReadI, but it is recomputed below. + WriteI = LR->begin() + WritePos; + } else { + // Shrink the gap if necessary. + LR->segments.erase(WriteI + Spills.size(), ReadI); + } + ReadI = WriteI + Spills.size(); + mergeSpills(); + LR->verify(); +} + +unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { + // Create initial equivalence classes. + EqClass.clear(); + EqClass.grow(LI->getNumValNums()); + + const VNInfo *used = nullptr, *unused = nullptr; + + // Determine connections. + for (const VNInfo *VNI : LI->valnos) { + // Group all unused values into one class. + if (VNI->isUnused()) { + if (unused) + EqClass.join(unused->id, VNI->id); + unused = VNI; + continue; + } + used = VNI; + if (VNI->isPHIDef()) { + const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + assert(MBB && "Phi-def has no defining MBB"); + // Connect to values live out of predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) + if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) + EqClass.join(VNI->id, PVNI->id); + } else { + // Normal value defined by an instruction. Check for two-addr redef. + // FIXME: This could be coincidental. Should we really check for a tied + // operand constraint? + // Note that VNI->def may be a use slot for an early clobber def. + if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) + EqClass.join(VNI->id, UVNI->id); + } + } + + // Lump all the unused values in with the last used value. + if (used && unused) + EqClass.join(used->id, unused->id); + + EqClass.compress(); + return EqClass.getNumClasses(); +} + +template<typename LiveRangeT, typename EqClassesT> +static void DistributeRange(LiveRangeT &LR, LiveRangeT *SplitLRs[], + EqClassesT VNIClasses) { + // Move segments to new intervals. + LiveRange::iterator J = LR.begin(), E = LR.end(); + while (J != E && VNIClasses[J->valno->id] == 0) + ++J; + for (LiveRange::iterator I = J; I != E; ++I) { + if (unsigned eq = VNIClasses[I->valno->id]) { + assert((SplitLRs[eq-1]->empty() || SplitLRs[eq-1]->expiredAt(I->start)) && + "New intervals should be empty"); + SplitLRs[eq-1]->segments.push_back(*I); + } else + *J++ = *I; + } + LR.segments.erase(J, E); + + // Transfer VNInfos to their new owners and renumber them. + unsigned j = 0, e = LR.getNumValNums(); + while (j != e && VNIClasses[j] == 0) + ++j; + for (unsigned i = j; i != e; ++i) { + VNInfo *VNI = LR.getValNumInfo(i); + if (unsigned eq = VNIClasses[i]) { + VNI->id = SplitLRs[eq-1]->getNumValNums(); + SplitLRs[eq-1]->valnos.push_back(VNI); + } else { + VNI->id = j; + LR.valnos[j++] = VNI; + } + } + LR.valnos.resize(j); +} + +void ConnectedVNInfoEqClasses::Distribute(LiveInterval &LI, LiveInterval *LIV[], + MachineRegisterInfo &MRI) { + // Rewrite instructions. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = *RI; + MachineInstr *MI = RI->getParent(); + ++RI; + // DBG_VALUE instructions don't have slot indexes, so get the index of the + // instruction before them. + // Normally, DBG_VALUE instructions are removed before this function is + // called, but it is not a requirement. + SlotIndex Idx; + if (MI->isDebugValue()) + Idx = LIS.getSlotIndexes()->getIndexBefore(MI); + else + Idx = LIS.getInstructionIndex(MI); + LiveQueryResult LRQ = LI.Query(Idx); + const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); + // In the case of an <undef> use that isn't tied to any def, VNI will be + // NULL. If the use is tied to a def, VNI will be the defined value. + if (!VNI) + continue; + if (unsigned EqClass = getEqClass(VNI)) + MO.setReg(LIV[EqClass-1]->reg); + } + + // Distribute subregister liveranges. + if (LI.hasSubRanges()) { + unsigned NumComponents = EqClass.getNumClasses(); + SmallVector<unsigned, 8> VNIMapping; + SmallVector<LiveInterval::SubRange*, 8> SubRanges; + BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator(); + for (LiveInterval::SubRange &SR : LI.subranges()) { + // Create new subranges in the split intervals and construct a mapping + // for the VNInfos in the subrange. + unsigned NumValNos = SR.valnos.size(); + VNIMapping.clear(); + VNIMapping.reserve(NumValNos); + SubRanges.clear(); + SubRanges.resize(NumComponents-1, nullptr); + for (unsigned I = 0; I < NumValNos; ++I) { + const VNInfo &VNI = *SR.valnos[I]; + const VNInfo *MainRangeVNI = LI.getVNInfoAt(VNI.def); + assert(MainRangeVNI != nullptr + && "SubRange def must have corresponding main range def"); + unsigned ComponentNum = getEqClass(MainRangeVNI); + VNIMapping.push_back(ComponentNum); + if (ComponentNum > 0 && SubRanges[ComponentNum-1] == nullptr) { + SubRanges[ComponentNum-1] + = LIV[ComponentNum-1]->createSubRange(Allocator, SR.LaneMask); + } + } + DistributeRange(SR, SubRanges.data(), VNIMapping); + } + LI.removeEmptySubRanges(); + } + + // Distribute main liverange. + DistributeRange(LI, LIV, EqClass); +} diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp new file mode 100644 index 0000000..9451d92 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -0,0 +1,1461 @@ +//===-- LiveIntervalAnalysis.cpp - Live Interval Analysis -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveInterval analysis pass which is used +// by the Linear Scan Register allocator. This pass linearizes the +// basic blocks of the function in DFS order and uses the +// LiveVariables pass to conservatively compute live intervals for +// each virtual and physical register. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "LiveRangeCalc.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/BlockFrequency.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cmath> +#include <limits> +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +char LiveIntervals::ID = 0; +char &llvm::LiveIntervalsID = LiveIntervals::ID; +INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveIntervals, "liveintervals", + "Live Interval Analysis", false, false) + +#ifndef NDEBUG +static cl::opt<bool> EnablePrecomputePhysRegs( + "precompute-phys-liveness", cl::Hidden, + cl::desc("Eagerly compute live intervals for all physreg units.")); +#else +static bool EnablePrecomputePhysRegs = false; +#endif // NDEBUG + +static cl::opt<bool> EnableSubRegLiveness( + "enable-subreg-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable subregister liveness tracking.")); + +namespace llvm { +cl::opt<bool> UseSegmentSetForPhysRegs( + "use-segment-set-for-physregs", cl::Hidden, cl::init(true), + cl::desc( + "Use segment set for the computation of the live ranges of physregs.")); +} + +void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + // LiveVariables isn't really required by this analysis, it is only required + // here to make sure it is live during TwoAddressInstructionPass and + // PHIElimination. This is temporary. + AU.addRequired<LiveVariables>(); + AU.addPreserved<LiveVariables>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addRequiredTransitiveID(MachineDominatorsID); + AU.addPreservedID(MachineDominatorsID); + AU.addPreserved<SlotIndexes>(); + AU.addRequiredTransitive<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +LiveIntervals::LiveIntervals() : MachineFunctionPass(ID), + DomTree(nullptr), LRCalc(nullptr) { + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); +} + +LiveIntervals::~LiveIntervals() { + delete LRCalc; +} + +void LiveIntervals::releaseMemory() { + // Free the live intervals themselves. + for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i) + delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)]; + VirtRegIntervals.clear(); + RegMaskSlots.clear(); + RegMaskBits.clear(); + RegMaskBlocks.clear(); + + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + delete RegUnitRanges[i]; + RegUnitRanges.clear(); + + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); +} + +/// runOnMachineFunction - calculates LiveIntervals +/// +bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + MRI = &MF->getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + Indexes = &getAnalysis<SlotIndexes>(); + DomTree = &getAnalysis<MachineDominatorTree>(); + + if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness()) + MRI->enableSubRegLiveness(true); + + if (!LRCalc) + LRCalc = new LiveRangeCalc(); + + // Allocate space for all virtual registers. + VirtRegIntervals.resize(MRI->getNumVirtRegs()); + + computeVirtRegs(); + computeRegMasks(); + computeLiveInRegUnits(); + + if (EnablePrecomputePhysRegs) { + // For stress testing, precompute live ranges of all physical register + // units, including reserved registers. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + getRegUnit(i); + } + DEBUG(dump()); + return true; +} + +/// print - Implement the dump method. +void LiveIntervals::print(raw_ostream &OS, const Module* ) const { + OS << "********** INTERVALS **********\n"; + + // Dump the regunits. + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + if (LiveRange *LR = RegUnitRanges[i]) + OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n'; + + // Dump the virtregs. + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (hasInterval(Reg)) + OS << getInterval(Reg) << '\n'; + } + + OS << "RegMasks:"; + for (unsigned i = 0, e = RegMaskSlots.size(); i != e; ++i) + OS << ' ' << RegMaskSlots[i]; + OS << '\n'; + + printInstrs(OS); +} + +void LiveIntervals::printInstrs(raw_ostream &OS) const { + OS << "********** MACHINEINSTRS **********\n"; + MF->print(OS, Indexes); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveIntervals::dumpInstrs() const { + printInstrs(dbgs()); +} +#endif + +LiveInterval* LiveIntervals::createInterval(unsigned reg) { + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? + llvm::huge_valf : 0.0F; + return new LiveInterval(reg, Weight); +} + + +/// computeVirtRegInterval - Compute the live interval of a virtual register, +/// based on defs and uses. +void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { + assert(LRCalc && "LRCalc not initialized."); + assert(LI.empty() && "Should only compute empty intervals."); + bool ShouldTrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(LI.reg); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + LRCalc->calculate(LI, ShouldTrackSubRegLiveness); + bool SeparatedComponents = computeDeadValues(LI, nullptr); + if (SeparatedComponents) { + assert(ShouldTrackSubRegLiveness + && "Separated components should only occur for unused subreg defs"); + SmallVector<LiveInterval*, 8> SplitLIs; + splitSeparateComponents(LI, SplitLIs); + } +} + +void LiveIntervals::computeVirtRegs() { + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + createAndComputeVirtRegInterval(Reg); + } +} + +void LiveIntervals::computeRegMasks() { + RegMaskBlocks.resize(MF->getNumBlockIDs()); + + // Find all instructions with regmask operands. + for (MachineBasicBlock &MBB : *MF) { + std::pair<unsigned, unsigned> &RMB = RegMaskBlocks[MBB.getNumber()]; + RMB.first = RegMaskSlots.size(); + + // Some block starts, such as EH funclets, create masks. + if (const uint32_t *Mask = MBB.getBeginClobberMask(TRI)) { + RegMaskSlots.push_back(Indexes->getMBBStartIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + + for (MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + RegMaskSlots.push_back(Indexes->getInstructionIndex(&MI).getRegSlot()); + RegMaskBits.push_back(MO.getRegMask()); + } + } + + // Some block ends, such as funclet returns, create masks. + if (const uint32_t *Mask = MBB.getEndClobberMask(TRI)) { + RegMaskSlots.push_back(Indexes->getMBBEndIdx(&MBB)); + RegMaskBits.push_back(Mask); + } + + // Compute the number of register mask instructions in this block. + RMB.second = RegMaskSlots.size() - RMB.first; + } +} + +//===----------------------------------------------------------------------===// +// Register Unit Liveness +//===----------------------------------------------------------------------===// +// +// Fixed interference typically comes from ABI boundaries: Function arguments +// and return values are passed in fixed registers, and so are exception +// pointers entering landing pads. Certain instructions require values to be +// present in specific registers. That is also represented through fixed +// interference. +// + +/// computeRegUnitInterval - Compute the live range of a register unit, based +/// on the uses and defs of aliasing registers. The range should be empty, +/// or contain only dead phi-defs from ABI blocks. +void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + + // The physregs aliasing Unit are the roots and their super-registers. + // Create all values as dead defs before extending to uses. Note that roots + // may share super-registers. That's OK because createDeadDefs() is + // idempotent. It is very rare for a register unit to have multiple roots, so + // uniquing super-registers is probably not worthwhile. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { + if (!MRI->reg_empty(*Supers)) + LRCalc->createDeadDefs(LR, *Supers); + } + } + + // Now extend LR to reach all uses. + // Ignore uses of reserved registers. We only track defs of those. + for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { + unsigned Reg = *Supers; + if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) + LRCalc->extendToUses(LR, Reg); + } + } + + // Flush the segment set to the segment vector. + if (UseSegmentSetForPhysRegs) + LR.flushSegmentSet(); +} + + +/// computeLiveInRegUnits - Precompute the live ranges of any register units +/// that are live-in to an ABI block somewhere. Register values can appear +/// without a corresponding def when entering the entry block or a landing pad. +/// +void LiveIntervals::computeLiveInRegUnits() { + RegUnitRanges.resize(TRI->getNumRegUnits()); + DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); + + // Keep track of the live range sets allocated. + SmallVector<unsigned, 8> NewRanges; + + // Check all basic blocks for live-ins. + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock *MBB = &*MFI; + + // We only care about ABI blocks: Entry + landing pads. + if ((MFI != MF->begin() && !MBB->isEHPad()) || MBB->livein_empty()) + continue; + + // Create phi-defs at Begin for all live-in registers. + SlotIndex Begin = Indexes->getMBBStartIdx(MBB); + DEBUG(dbgs() << Begin << "\tBB#" << MBB->getNumber()); + for (const auto &LI : MBB->liveins()) { + for (MCRegUnitIterator Units(LI.PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = *Units; + LiveRange *LR = RegUnitRanges[Unit]; + if (!LR) { + // Use segment set to speed-up initial computation of the live range. + LR = RegUnitRanges[Unit] = new LiveRange(UseSegmentSetForPhysRegs); + NewRanges.push_back(Unit); + } + VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); + (void)VNI; + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); + } + } + DEBUG(dbgs() << '\n'); + } + DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); + + // Compute the 'normal' part of the ranges. + for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) { + unsigned Unit = NewRanges[i]; + computeRegUnitRange(*RegUnitRanges[Unit], Unit); + } +} + + +static void createSegmentsForValues(LiveRange &LR, + iterator_range<LiveInterval::vni_iterator> VNIs) { + for (auto VNI : VNIs) { + if (VNI->isUnused()) + continue; + SlotIndex Def = VNI->def; + LR.addSegment(LiveRange::Segment(Def, Def.getDeadSlot(), VNI)); + } +} + +typedef SmallVector<std::pair<SlotIndex, VNInfo*>, 16> ShrinkToUsesWorkList; + +static void extendSegmentsToUses(LiveRange &LR, const SlotIndexes &Indexes, + ShrinkToUsesWorkList &WorkList, + const LiveRange &OldRange) { + // Keep track of the PHIs that are in use. + SmallPtrSet<VNInfo*, 8> UsedPHIs; + // Blocks that have already been added to WorkList as live-out. + SmallPtrSet<MachineBasicBlock*, 16> LiveOut; + + // Extend intervals to reach all uses in WorkList. + while (!WorkList.empty()) { + SlotIndex Idx = WorkList.back().first; + VNInfo *VNI = WorkList.back().second; + WorkList.pop_back(); + const MachineBasicBlock *MBB = Indexes.getMBBFromIndex(Idx.getPrevSlot()); + SlotIndex BlockStart = Indexes.getMBBStartIdx(MBB); + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = LR.extendInBlock(BlockStart, Idx)) { + assert(ExtVNI == VNI && "Unexpected existing value number"); + (void)ExtVNI; + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || + !UsedPHIs.insert(VNI).second) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (auto &Pred : MBB->predecessors()) { + if (!LiveOut.insert(Pred).second) + continue; + SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + // A predecessor is not required to have a live-out value for a PHI. + if (VNInfo *PVNI = OldRange.getVNInfoBefore(Stop)) + WorkList.push_back(std::make_pair(Stop, PVNI)); + } + continue; + } + + // VNI is live-in to MBB. + DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); + LR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); + + // Make sure VNI is live-out from the predecessors. + for (auto &Pred : MBB->predecessors()) { + if (!LiveOut.insert(Pred).second) + continue; + SlotIndex Stop = Indexes.getMBBEndIdx(Pred); + assert(OldRange.getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); + WorkList.push_back(std::make_pair(Stop, VNI)); + } + } +} + +bool LiveIntervals::shrinkToUses(LiveInterval *li, + SmallVectorImpl<MachineInstr*> *dead) { + DEBUG(dbgs() << "Shrink: " << *li << '\n'); + assert(TargetRegisterInfo::isVirtualRegister(li->reg) + && "Can only shrink virtual registers"); + + // Shrink subregister live ranges. + bool NeedsCleanup = false; + for (LiveInterval::SubRange &S : li->subranges()) { + shrinkToUses(S, li->reg); + if (S.empty()) + NeedsCleanup = true; + } + if (NeedsCleanup) + li->removeEmptySubRanges(); + + // Find all the values used, including PHI kills. + ShrinkToUsesWorkList WorkList; + + // Visit all instructions reading li->reg. + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(li->reg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *UseMI = &*(I++); + if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) + continue; + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + LiveQueryResult LRQ = li->Query(Idx); + VNInfo *VNI = LRQ.valueIn(); + if (!VNI) { + // This shouldn't happen: readsVirtualRegister returns true, but there is + // no live value. It is likely caused by a target getting <undef> flags + // wrong. + DEBUG(dbgs() << Idx << '\t' << *UseMI + << "Warning: Instr claims to read non-existent value in " + << *li << '\n'); + continue; + } + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + + WorkList.push_back(std::make_pair(Idx, VNI)); + } + + // Create new live ranges with only minimal live segments per def. + LiveRange NewLR; + createSegmentsForValues(NewLR, make_range(li->vni_begin(), li->vni_end())); + extendSegmentsToUses(NewLR, *Indexes, WorkList, *li); + + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); + + // Handle dead values. + bool CanSeparate = computeDeadValues(*li, dead); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; +} + +bool LiveIntervals::computeDeadValues(LiveInterval &LI, + SmallVectorImpl<MachineInstr*> *dead) { + bool MayHaveSplitComponents = false; + for (auto VNI : LI.valnos) { + if (VNI->isUnused()) + continue; + SlotIndex Def = VNI->def; + LiveRange::iterator I = LI.FindSegmentContaining(Def); + assert(I != LI.end() && "Missing segment for VNI"); + + // Is the register live before? Otherwise we may have to add a read-undef + // flag for subregister defs. + bool DeadBeforeDef = false; + unsigned VReg = LI.reg; + if (MRI->shouldTrackSubRegLiveness(VReg)) { + if ((I == LI.begin() || std::prev(I)->end < Def) && !VNI->isPHIDef()) { + MachineInstr *MI = getInstructionFromIndex(Def); + MI->setRegisterDefReadUndef(VReg); + DeadBeforeDef = true; + } + } + + if (I->end != Def.getDeadSlot()) + continue; + if (VNI->isPHIDef()) { + // This is a dead PHI. Remove it. + VNI->markUnused(); + LI.removeSegment(I); + DEBUG(dbgs() << "Dead PHI at " << Def << " may separate interval\n"); + MayHaveSplitComponents = true; + } else { + // This is a dead def. Make sure the instruction knows. + MachineInstr *MI = getInstructionFromIndex(Def); + assert(MI && "No instruction defining live value"); + MI->addRegisterDead(VReg, TRI); + + // If we have a dead def that is completely separate from the rest of + // the liverange then we rewrite it to use a different VReg to not violate + // the rule that the liveness of a virtual register forms a connected + // component. This should only happen if subregister liveness is tracked. + if (DeadBeforeDef) + MayHaveSplitComponents = true; + + if (dead && MI->allDefsAreDead()) { + DEBUG(dbgs() << "All defs dead: " << Def << '\t' << *MI); + dead->push_back(MI); + } + } + } + return MayHaveSplitComponents; +} + +void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) +{ + DEBUG(dbgs() << "Shrink: " << SR << '\n'); + assert(TargetRegisterInfo::isVirtualRegister(Reg) + && "Can only shrink virtual registers"); + // Find all the values used, including PHI kills. + ShrinkToUsesWorkList WorkList; + + // Visit all instructions reading Reg. + SlotIndex LastIdx; + for (MachineOperand &MO : MRI->reg_operands(Reg)) { + MachineInstr *UseMI = MO.getParent(); + if (UseMI->isDebugValue()) + continue; + // Maybe the operand is for a subregister we don't care about. + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg); + if ((LaneMask & SR.LaneMask) == 0) + continue; + } + // We only need to visit each instruction once. + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + if (Idx == LastIdx) + continue; + LastIdx = Idx; + + LiveQueryResult LRQ = SR.Query(Idx); + VNInfo *VNI = LRQ.valueIn(); + // For Subranges it is possible that only undef values are left in that + // part of the subregister, so there is no real liverange at the use + if (!VNI) + continue; + + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. + if (VNInfo *DefVNI = LRQ.valueDefined()) + Idx = DefVNI->def; + + WorkList.push_back(std::make_pair(Idx, VNI)); + } + + // Create a new live ranges with only minimal live segments per def. + LiveRange NewLR; + createSegmentsForValues(NewLR, make_range(SR.vni_begin(), SR.vni_end())); + extendSegmentsToUses(NewLR, *Indexes, WorkList, SR); + + // Move the trimmed ranges back. + SR.segments.swap(NewLR.segments); + + // Remove dead PHI value numbers + for (auto VNI : SR.valnos) { + if (VNI->isUnused()) + continue; + const LiveRange::Segment *Segment = SR.getSegmentContaining(VNI->def); + assert(Segment != nullptr && "Missing segment for VNI"); + if (Segment->end != VNI->def.getDeadSlot()) + continue; + if (VNI->isPHIDef()) { + // This is a dead PHI. Remove it. + VNI->markUnused(); + SR.removeSegment(*Segment); + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + } + } + + DEBUG(dbgs() << "Shrunk: " << SR << '\n'); +} + +void LiveIntervals::extendToIndices(LiveRange &LR, + ArrayRef<SlotIndex> Indices) { + assert(LRCalc && "LRCalc not initialized."); + LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + LRCalc->extend(LR, Indices[i]); +} + +void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill, + SmallVectorImpl<SlotIndex> *EndPoints) { + LiveQueryResult LRQ = LR.Query(Kill); + VNInfo *VNI = LRQ.valueOutOrDead(); + if (!VNI) + return; + + MachineBasicBlock *KillMBB = Indexes->getMBBFromIndex(Kill); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(KillMBB); + + // If VNI isn't live out from KillMBB, the value is trivially pruned. + if (LRQ.endPoint() < MBBEnd) { + LR.removeSegment(Kill, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + return; + } + + // VNI is live out of KillMBB. + LR.removeSegment(Kill, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + + // Find all blocks that are reachable from KillMBB without leaving VNI's live + // range. It is possible that KillMBB itself is reachable, so start a DFS + // from each successor. + typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy; + VisitedTy Visited; + for (MachineBasicBlock::succ_iterator + SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end(); + SuccI != SuccE; ++SuccI) { + for (df_ext_iterator<MachineBasicBlock*, VisitedTy> + I = df_ext_begin(*SuccI, Visited), E = df_ext_end(*SuccI, Visited); + I != E;) { + MachineBasicBlock *MBB = *I; + + // Check if VNI is live in to MBB. + SlotIndex MBBStart, MBBEnd; + std::tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); + LiveQueryResult LRQ = LR.Query(MBBStart); + if (LRQ.valueIn() != VNI) { + // This block isn't part of the VNI segment. Prune the search. + I.skipChildren(); + continue; + } + + // Prune the search if VNI is killed in MBB. + if (LRQ.endPoint() < MBBEnd) { + LR.removeSegment(MBBStart, LRQ.endPoint()); + if (EndPoints) EndPoints->push_back(LRQ.endPoint()); + I.skipChildren(); + continue; + } + + // VNI is live through MBB. + LR.removeSegment(MBBStart, MBBEnd); + if (EndPoints) EndPoints->push_back(MBBEnd); + ++I; + } + } +} + +//===----------------------------------------------------------------------===// +// Register allocator hooks. +// + +void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { + // Keep track of regunit ranges. + SmallVector<std::pair<const LiveRange*, LiveRange::const_iterator>, 8> RU; + // Keep track of subregister ranges. + SmallVector<std::pair<const LiveInterval::SubRange*, + LiveRange::const_iterator>, 4> SRs; + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + const LiveInterval &LI = getInterval(Reg); + if (LI.empty()) + continue; + + // Find the regunit intervals for the assigned register. They may overlap + // the virtual register live range, cancelling any kills. + RU.clear(); + for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); + ++Units) { + const LiveRange &RURange = getRegUnit(*Units); + if (RURange.empty()) + continue; + RU.push_back(std::make_pair(&RURange, RURange.find(LI.begin()->end))); + } + + if (MRI->subRegLivenessEnabled()) { + SRs.clear(); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + SRs.push_back(std::make_pair(&SR, SR.find(LI.begin()->end))); + } + } + + // Every instruction that kills Reg corresponds to a segment range end + // point. + for (LiveInterval::const_iterator RI = LI.begin(), RE = LI.end(); RI != RE; + ++RI) { + // A block index indicates an MBB edge. + if (RI->end.isBlock()) + continue; + MachineInstr *MI = getInstructionFromIndex(RI->end); + if (!MI) + continue; + + // Check if any of the regunits are live beyond the end of RI. That could + // happen when a physreg is defined as a copy of a virtreg: + // + // %EAX = COPY %vreg5 + // FOO %vreg5 <--- MI, cancel kill because %EAX is live. + // BAR %EAX<kill> + // + // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. + for (auto &RUP : RU) { + const LiveRange &RURange = *RUP.first; + LiveRange::const_iterator &I = RUP.second; + if (I == RURange.end()) + continue; + I = RURange.advanceTo(I, RI->end); + if (I == RURange.end() || I->start >= RI->end) + continue; + // I is overlapping RI. + goto CancelKill; + } + + if (MRI->subRegLivenessEnabled()) { + // When reading a partial undefined value we must not add a kill flag. + // The regalloc might have used the undef lane for something else. + // Example: + // %vreg1 = ... ; R32: %vreg1 + // %vreg2:high16 = ... ; R64: %vreg2 + // = read %vreg2<kill> ; R64: %vreg2 + // = read %vreg1 ; R32: %vreg1 + // The <kill> flag is correct for %vreg2, but the register allocator may + // assign R0L to %vreg1, and R0 to %vreg2 because the low 32bits of R0 + // are actually never written by %vreg2. After assignment the <kill> + // flag at the read instruction is invalid. + LaneBitmask DefinedLanesMask; + if (!SRs.empty()) { + // Compute a mask of lanes that are defined. + DefinedLanesMask = 0; + for (auto &SRP : SRs) { + const LiveInterval::SubRange &SR = *SRP.first; + LiveRange::const_iterator &I = SRP.second; + if (I == SR.end()) + continue; + I = SR.advanceTo(I, RI->end); + if (I == SR.end() || I->start >= RI->end) + continue; + // I is overlapping RI + DefinedLanesMask |= SR.LaneMask; + } + } else + DefinedLanesMask = ~0u; + + bool IsFullWrite = false; + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (MO.isUse()) { + // Reading any undefined lanes? + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + if ((UseMask & ~DefinedLanesMask) != 0) + goto CancelKill; + } else if (MO.getSubReg() == 0) { + // Writing to the full register? + assert(MO.isDef()); + IsFullWrite = true; + } + } + + // If an instruction writes to a subregister, a new segment starts in + // the LiveInterval. But as this is only overriding part of the register + // adding kill-flags is not correct here after registers have been + // assigned. + if (!IsFullWrite) { + // Next segment has to be adjacent in the subregister write case. + LiveRange::const_iterator N = std::next(RI); + if (N != LI.end() && N->start == RI->end) + goto CancelKill; + } + } + + MI->addRegisterKilled(Reg, nullptr); + continue; +CancelKill: + MI->clearRegisterKills(Reg, nullptr); + } + } +} + +MachineBasicBlock* +LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const { + // A local live range must be fully contained inside the block, meaning it is + // defined and killed at instructions, not at block boundaries. It is not + // live in or or out of any block. + // + // It is technically possible to have a PHI-defined live range identical to a + // single block, but we are going to return false in that case. + + SlotIndex Start = LI.beginIndex(); + if (Start.isBlock()) + return nullptr; + + SlotIndex Stop = LI.endIndex(); + if (Stop.isBlock()) + return nullptr; + + // getMBBFromIndex doesn't need to search the MBB table when both indexes + // belong to proper instructions. + MachineBasicBlock *MBB1 = Indexes->getMBBFromIndex(Start); + MachineBasicBlock *MBB2 = Indexes->getMBBFromIndex(Stop); + return MBB1 == MBB2 ? MBB1 : nullptr; +} + +bool +LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { + for (const VNInfo *PHI : LI.valnos) { + if (PHI->isUnused() || !PHI->isPHIDef()) + continue; + const MachineBasicBlock *PHIMBB = getMBBFromIndex(PHI->def); + // Conservatively return true instead of scanning huge predecessor lists. + if (PHIMBB->pred_size() > 100) + return true; + for (MachineBasicBlock::const_pred_iterator + PI = PHIMBB->pred_begin(), PE = PHIMBB->pred_end(); PI != PE; ++PI) + if (VNI == LI.getVNInfoBefore(Indexes->getMBBEndIdx(*PI))) + return true; + } + return false; +} + +float +LiveIntervals::getSpillWeight(bool isDef, bool isUse, + const MachineBlockFrequencyInfo *MBFI, + const MachineInstr *MI) { + BlockFrequency Freq = MBFI->getBlockFreq(MI->getParent()); + const float Scale = 1.0f / MBFI->getEntryFreq(); + return (isDef + isUse) * (Freq.getFrequency() * Scale); +} + +LiveRange::Segment +LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) { + LiveInterval& Interval = createEmptyInterval(reg); + VNInfo* VN = Interval.getNextValue( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getVNInfoAllocator()); + LiveRange::Segment S( + SlotIndex(getInstructionIndex(startInst).getRegSlot()), + getMBBEndIdx(startInst->getParent()), VN); + Interval.addSegment(S); + + return S; +} + + +//===----------------------------------------------------------------------===// +// Register mask functions +//===----------------------------------------------------------------------===// + +bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI, + BitVector &UsableRegs) { + if (LI.empty()) + return false; + LiveInterval::iterator LiveI = LI.begin(), LiveE = LI.end(); + + // Use a smaller arrays for local live ranges. + ArrayRef<SlotIndex> Slots; + ArrayRef<const uint32_t*> Bits; + if (MachineBasicBlock *MBB = intervalIsInOneMBB(LI)) { + Slots = getRegMaskSlotsInBlock(MBB->getNumber()); + Bits = getRegMaskBitsInBlock(MBB->getNumber()); + } else { + Slots = getRegMaskSlots(); + Bits = getRegMaskBits(); + } + + // We are going to enumerate all the register mask slots contained in LI. + // Start with a binary search of RegMaskSlots to find a starting point. + ArrayRef<SlotIndex>::iterator SlotI = + std::lower_bound(Slots.begin(), Slots.end(), LiveI->start); + ArrayRef<SlotIndex>::iterator SlotE = Slots.end(); + + // No slots in range, LI begins after the last call. + if (SlotI == SlotE) + return false; + + bool Found = false; + for (;;) { + assert(*SlotI >= LiveI->start); + // Loop over all slots overlapping this segment. + while (*SlotI < LiveI->end) { + // *SlotI overlaps LI. Collect mask bits. + if (!Found) { + // This is the first overlap. Initialize UsableRegs to all ones. + UsableRegs.clear(); + UsableRegs.resize(TRI->getNumRegs(), true); + Found = true; + } + // Remove usable registers clobbered by this mask. + UsableRegs.clearBitsNotInMask(Bits[SlotI-Slots.begin()]); + if (++SlotI == SlotE) + return Found; + } + // *SlotI is beyond the current LI segment. + LiveI = LI.advanceTo(LiveI, *SlotI); + if (LiveI == LiveE) + return Found; + // Advance SlotI until it overlaps. + while (*SlotI < LiveI->start) + if (++SlotI == SlotE) + return Found; + } +} + +//===----------------------------------------------------------------------===// +// IntervalUpdate class. +//===----------------------------------------------------------------------===// + +// HMEditor is a toolkit used by handleMove to trim or extend live intervals. +class LiveIntervals::HMEditor { +private: + LiveIntervals& LIS; + const MachineRegisterInfo& MRI; + const TargetRegisterInfo& TRI; + SlotIndex OldIdx; + SlotIndex NewIdx; + SmallPtrSet<LiveRange*, 8> Updated; + bool UpdateFlags; + +public: + HMEditor(LiveIntervals& LIS, const MachineRegisterInfo& MRI, + const TargetRegisterInfo& TRI, + SlotIndex OldIdx, SlotIndex NewIdx, bool UpdateFlags) + : LIS(LIS), MRI(MRI), TRI(TRI), OldIdx(OldIdx), NewIdx(NewIdx), + UpdateFlags(UpdateFlags) {} + + // FIXME: UpdateFlags is a workaround that creates live intervals for all + // physregs, even those that aren't needed for regalloc, in order to update + // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill + // flags, and postRA passes will use a live register utility instead. + LiveRange *getRegUnitLI(unsigned Unit) { + if (UpdateFlags) + return &LIS.getRegUnit(Unit); + return LIS.getCachedRegUnit(Unit); + } + + /// Update all live ranges touched by MI, assuming a move from OldIdx to + /// NewIdx. + void updateAllRanges(MachineInstr *MI) { + DEBUG(dbgs() << "handleMove " << OldIdx << " -> " << NewIdx << ": " << *MI); + bool hasRegMask = false; + for (MachineOperand &MO : MI->operands()) { + if (MO.isRegMask()) + hasRegMask = true; + if (!MO.isReg()) + continue; + // Aggressively clear all kill flags. + // They are reinserted by VirtRegRewriter. + if (MO.isUse()) + MO.setIsKill(false); + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + LiveInterval &LI = LIS.getInterval(Reg); + if (LI.hasSubRanges()) { + unsigned SubReg = MO.getSubReg(); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask) == 0) + continue; + updateRange(S, Reg, S.LaneMask); + } + } + updateRange(LI, Reg, 0); + continue; + } + + // For physregs, only update the regunits that actually have a + // precomputed live range. + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) + if (LiveRange *LR = getRegUnitLI(*Units)) + updateRange(*LR, *Units, 0); + } + if (hasRegMask) + updateRegMaskSlots(); + } + +private: + /// Update a single live range, assuming an instruction has been moved from + /// OldIdx to NewIdx. + void updateRange(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { + if (!Updated.insert(&LR).second) + return; + DEBUG({ + dbgs() << " "; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + dbgs() << PrintReg(Reg); + if (LaneMask != 0) + dbgs() << " L" << PrintLaneMask(LaneMask); + } else { + dbgs() << PrintRegUnit(Reg, &TRI); + } + dbgs() << ":\t" << LR << '\n'; + }); + if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) + handleMoveDown(LR); + else + handleMoveUp(LR, Reg, LaneMask); + DEBUG(dbgs() << " -->\t" << LR << '\n'); + LR.verify(); + } + + /// Update LR to reflect an instruction has been moved downwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Move def to NewIdx, assert endpoint after NewIdx. + /// + /// 2. Live def at OldIdx, killed at NewIdx: + /// Change to dead def at NewIdx. + /// (Happens when bundling def+kill together). + /// + /// 3. Dead def at OldIdx: + /// Move def to NewIdx, possibly across another live value. + /// + /// 4. Def at OldIdx AND at NewIdx: + /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value read at OldIdx, killed before NewIdx: + /// Extend kill to NewIdx. + /// + void handleMoveDown(LiveRange &LR) { + // First look for a kill at OldIdx. + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; + + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + bool isKill = SlotIndex::isSameInstr(OldIdx, I->end); + // If the live-in value already extends to NewIdx, there is nothing to do. + if (!SlotIndex::isEarlierInstr(I->end, NewIdx)) + return; + // Aggressively remove all kill flags from the old kill point. + // Kill flags shouldn't be used while live intervals exist, they will be + // reinserted by VirtRegRewriter. + if (MachineInstr *KillMI = LIS.getInstructionFromIndex(I->end)) + for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) + if (MO->isReg() && MO->isUse()) + MO->setIsKill(false); + // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by + // overlapping ranges. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + // If this was a kill, there may also be a def. Otherwise we're done. + if (!isKill) + return; + ++I; + } + + // Check for a def at OldIdx. + if (I == E || !SlotIndex::isSameInstr(OldIdx, I->start)) + return; + // We have a def at OldIdx. + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + // If the defined value extends beyond NewIdx, just move the def down. + // This is case 1 above. + if (SlotIndex::isEarlierInstr(NewIdx, I->end)) { + I->start = DefVNI->def; + return; + } + // The remaining possibilities are now: + // 2. Live def at OldIdx, killed at NewIdx: isSameInstr(I->end, NewIdx). + // 3. Dead def at OldIdx: I->end = OldIdx.getDeadSlot(). + // In either case, it is possible that there is an existing def at NewIdx. + assert((I->end == OldIdx.getDeadSlot() || + SlotIndex::isSameInstr(I->end, NewIdx)) && + "Cannot move def below kill"); + LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); + if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { + // There is an existing def at NewIdx, case 4 above. The def at OldIdx is + // coalesced into that value. + assert(NewI->valno != DefVNI && "Multiple defs of value?"); + LR.removeValNo(DefVNI); + return; + } + // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. + // If the def at OldIdx was dead, we allow it to be moved across other LR + // values. The new range should be placed immediately before NewI, move any + // intermediate ranges up. + assert(NewI != I && "Inconsistent iterators"); + std::copy(std::next(I), NewI, I); + *std::prev(NewI) + = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + } + + /// Update LR to reflect an instruction has been moved upwards from OldIdx + /// to NewIdx. + /// + /// 1. Live def at OldIdx: + /// Hoist def to NewIdx. + /// + /// 2. Dead def at OldIdx: + /// Hoist def+end to NewIdx, possibly move across other values. + /// + /// 3. Dead def at OldIdx AND existing def at NewIdx: + /// Remove value defined at OldIdx, coalescing it with existing value. + /// + /// 4. Live def at OldIdx AND existing def at NewIdx: + /// Remove value defined at NewIdx, hoist OldIdx def to NewIdx. + /// (Happens when bundling multiple defs together). + /// + /// 5. Value killed at OldIdx: + /// Hoist kill to NewIdx, then scan for last kill between NewIdx and + /// OldIdx. + /// + void handleMoveUp(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask) { + // First look for a kill at OldIdx. + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? + if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) + return; + + // Handle a live-in value. + if (!SlotIndex::isSameInstr(I->start, OldIdx)) { + // If the live-in value isn't killed here, there is nothing to do. + if (!SlotIndex::isSameInstr(OldIdx, I->end)) + return; + // Adjust I->end to end at NewIdx. If we are hoisting a kill above + // another use, we need to search for that use. Case 5 above. + I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); + ++I; + // If OldIdx also defines a value, there couldn't have been another use. + if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { + // No def, search for the new kill. + // This can never be an early clobber kill since there is no def. + std::prev(I)->end = findLastUseBefore(Reg, LaneMask).getRegSlot(); + return; + } + } + + // Now deal with the def at OldIdx. + assert(I != E && SlotIndex::isSameInstr(I->start, OldIdx) && "No def?"); + VNInfo *DefVNI = I->valno; + assert(DefVNI->def == I->start && "Inconsistent def"); + DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); + + // Check for an existing def at NewIdx. + LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); + if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { + assert(NewI->valno != DefVNI && "Same value defined more than once?"); + // There is an existing def at NewIdx. + if (I->end.isDead()) { + // Case 3: Remove the dead def at OldIdx. + LR.removeValNo(DefVNI); + return; + } + // Case 4: Replace def at NewIdx with live def at OldIdx. + I->start = DefVNI->def; + LR.removeValNo(NewI->valno); + return; + } + + // There is no existing def at NewIdx. Hoist DefVNI. + if (!I->end.isDead()) { + // Leave the end point of a live def. + I->start = DefVNI->def; + return; + } + + // DefVNI is a dead def. It may have been moved across other values in LR, + // so move I up to NewI. Slide [NewI;I) down one position. + std::copy_backward(NewI, I, std::next(I)); + *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + } + + void updateRegMaskSlots() { + SmallVectorImpl<SlotIndex>::iterator RI = + std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(), + OldIdx); + assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() && + "No RegMask at OldIdx."); + *RI = NewIdx.getRegSlot(); + assert((RI == LIS.RegMaskSlots.begin() || + SlotIndex::isEarlierInstr(*std::prev(RI), *RI)) && + "Cannot move regmask instruction above another call"); + assert((std::next(RI) == LIS.RegMaskSlots.end() || + SlotIndex::isEarlierInstr(*RI, *std::next(RI))) && + "Cannot move regmask instruction below another call"); + } + + // Return the last use of reg between NewIdx and OldIdx. + SlotIndex findLastUseBefore(unsigned Reg, LaneBitmask LaneMask) { + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + SlotIndex LastUse = NewIdx; + for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) { + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0 && LaneMask != 0 + && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0) + continue; + + const MachineInstr *MI = MO.getParent(); + SlotIndex InstSlot = LIS.getSlotIndexes()->getInstructionIndex(MI); + if (InstSlot > LastUse && InstSlot < OldIdx) + LastUse = InstSlot; + } + return LastUse; + } + + // This is a regunit interval, so scanning the use list could be very + // expensive. Scan upwards from OldIdx instead. + assert(NewIdx < OldIdx && "Expected upwards move"); + SlotIndexes *Indexes = LIS.getSlotIndexes(); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx); + + // OldIdx may not correspond to an instruction any longer, so set MII to + // point to the next instruction after OldIdx, or MBB->end(). + MachineBasicBlock::iterator MII = MBB->end(); + if (MachineInstr *MI = Indexes->getInstructionFromIndex( + Indexes->getNextNonNullIndex(OldIdx))) + if (MI->getParent() == MBB) + MII = MI; + + MachineBasicBlock::iterator Begin = MBB->begin(); + while (MII != Begin) { + if ((--MII)->isDebugValue()) + continue; + SlotIndex Idx = Indexes->getInstructionIndex(MII); + + // Stop searching when NewIdx is reached. + if (!SlotIndex::isEarlierInstr(NewIdx, Idx)) + return NewIdx; + + // Check if MII uses Reg. + for (MIBundleOperands MO(MII); MO.isValid(); ++MO) + if (MO->isReg() && + TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + TRI.hasRegUnit(MO->getReg(), Reg)) + return Idx; + } + // Didn't reach NewIdx. It must be the first instruction in the block. + return NewIdx; + } +}; + +void LiveIntervals::handleMove(MachineInstr* MI, bool UpdateFlags) { + assert(!MI->isBundled() && "Can't handle bundled instructions yet."); + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); + Indexes->removeMachineInstrFromMaps(MI); + SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI); + assert(getMBBStartIdx(MI->getParent()) <= OldIndex && + OldIndex < getMBBEndIdx(MI->getParent()) && + "Cannot handle moves across basic block boundaries."); + + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); +} + +void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, + MachineInstr* BundleStart, + bool UpdateFlags) { + SlotIndex OldIndex = Indexes->getInstructionIndex(MI); + SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart); + HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); + HME.updateAllRanges(MI); +} + +void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin, + const MachineBasicBlock::iterator End, + const SlotIndex endIdx, + LiveRange &LR, const unsigned Reg, + LaneBitmask LaneMask) { + LiveInterval::iterator LII = LR.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LR.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + unsigned SubReg = MO.getSubReg(); + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg); + if ((Mask & LaneMask) == 0) + continue; + + if (MO.isDef()) { + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LR.begin()) + prevStart = std::prev(LII)->start; + + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LR.removeSegment(*LII, true); + if (prevStart.isValid()) + LII = LR.find(prevStart); + else + LII = LR.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LR.addSegment(S); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LR.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LR.addSegment(S); + } + + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) + LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); + } + } + } +} + +void +LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + ArrayRef<unsigned> OrigRegs) { + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !Indexes->hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !Indexes->hasIndex(End)) + ++End; + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB).getPrevSlot(); + else + endIdx = getInstructionIndex(End); + + Indexes->repairIndexesInRange(MBB, Begin, End); + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && + TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + !hasInterval(MOI->getReg())) { + createAndComputeVirtRegInterval(MOI->getReg()); + } + } + } + + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { + unsigned Reg = OrigRegs[i]; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + LiveInterval &LI = getInterval(Reg); + // FIXME: Should we support undefs that gain defs? + if (!LI.hasAtLeastOneValue()) + continue; + + for (LiveInterval::SubRange &S : LI.subranges()) { + repairOldRegInRange(Begin, End, endIdx, S, Reg, S.LaneMask); + } + repairOldRegInRange(Begin, End, endIdx, LI, Reg); + } +} + +void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (LiveRange *LR = getCachedRegUnit(*Units)) + if (VNInfo *VNI = LR->getVNInfoAt(Pos)) + LR->removeValNo(VNI); + } +} + +void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) { + VNInfo *VNI = LI.getVNInfoAt(Pos); + if (VNI == nullptr) + return; + LI.removeValNo(VNI); + + // Also remove the value in subranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + if (VNInfo *SVNI = S.getVNInfoAt(Pos)) + S.removeValNo(SVNI); + } + LI.removeEmptySubRanges(); +} + +void LiveIntervals::splitSeparateComponents(LiveInterval &LI, + SmallVectorImpl<LiveInterval*> &SplitLIs) { + ConnectedVNInfoEqClasses ConEQ(*this); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp <= 1) + return; + DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n'); + unsigned Reg = LI.reg; + const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); + for (unsigned I = 1; I < NumComp; ++I) { + unsigned NewVReg = MRI->createVirtualRegister(RegClass); + LiveInterval &NewLI = createEmptyInterval(NewVReg); + SplitLIs.push_back(&NewLI); + } + ConEQ.Distribute(LI, SplitLIs.data(), *MRI); +} diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp new file mode 100644 index 0000000..025d99c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp @@ -0,0 +1,205 @@ +//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// LiveIntervalUnion represents a coalesced set of live intervals. This may be +// used during coalescing to represent a congruence class, or during register +// allocation to model liveness of a physical register. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <algorithm> + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + + +// Merge a LiveInterval's segments. Guarantee no overlaps. +void LiveIntervalUnion::unify(LiveInterval &VirtReg, const LiveRange &Range) { + if (Range.empty()) + return; + ++Tag; + + // Insert each of the virtual register's live segments into the map. + LiveRange::const_iterator RegPos = Range.begin(); + LiveRange::const_iterator RegEnd = Range.end(); + SegmentIter SegPos = Segments.find(RegPos->start); + + while (SegPos.valid()) { + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); + if (++RegPos == RegEnd) + return; + SegPos.advanceTo(RegPos->start); + } + + // We have reached the end of Segments, so it is no longer necessary to search + // for the insertion position. + // It is faster to insert the end first. + --RegEnd; + SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg); + for (; RegPos != RegEnd; ++RegPos, ++SegPos) + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); +} + +// Remove a live virtual register's segments from this union. +void LiveIntervalUnion::extract(LiveInterval &VirtReg, const LiveRange &Range) { + if (Range.empty()) + return; + ++Tag; + + // Remove each of the virtual register's live segments from the map. + LiveRange::const_iterator RegPos = Range.begin(); + LiveRange::const_iterator RegEnd = Range.end(); + SegmentIter SegPos = Segments.find(RegPos->start); + + for (;;) { + assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval"); + SegPos.erase(); + if (!SegPos.valid()) + return; + + // Skip all segments that may have been coalesced. + RegPos = Range.advanceTo(RegPos, SegPos.start()); + if (RegPos == RegEnd) + return; + + SegPos.advanceTo(RegPos->start); + } +} + +void +LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { + if (empty()) { + OS << " empty\n"; + return; + } + for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) { + OS << " [" << SI.start() << ' ' << SI.stop() << "):" + << PrintReg(SI.value()->reg, TRI); + } + OS << '\n'; +} + +#ifndef NDEBUG +// Verify the live intervals in this union and add them to the visited set. +void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) { + for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI) + VisitedVRegs.set(SI.value()->reg); +} +#endif //!NDEBUG + +// Scan the vector of interfering virtual registers in this union. Assume it's +// quite small. +bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { + SmallVectorImpl<LiveInterval*>::const_iterator I = + std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg); + return I != InterferingVRegs.end(); +} + +// Collect virtual registers in this union that interfere with this +// query's live virtual register. +// +// The query state is one of: +// +// 1. CheckedFirstInterference == false: Iterators are uninitialized. +// 2. SeenAllInterferences == true: InterferingVRegs complete, iterators unused. +// 3. Iterators left at the last seen intersection. +// +unsigned LiveIntervalUnion::Query:: +collectInterferingVRegs(unsigned MaxInterferingRegs) { + // Fast path return if we already have the desired information. + if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); + + // Set up iterators on the first call. + if (!CheckedFirstInterference) { + CheckedFirstInterference = true; + + // Quickly skip interference check for empty sets. + if (VirtReg->empty() || LiveUnion->empty()) { + SeenAllInterferences = true; + return 0; + } + + // In most cases, the union will start before VirtReg. + VirtRegI = VirtReg->begin(); + LiveUnionI.setMap(LiveUnion->getMap()); + LiveUnionI.find(VirtRegI->start); + } + + LiveInterval::iterator VirtRegEnd = VirtReg->end(); + LiveInterval *RecentReg = nullptr; + while (LiveUnionI.valid()) { + assert(VirtRegI != VirtRegEnd && "Reached end of VirtReg"); + + // Check for overlapping interference. + while (VirtRegI->start < LiveUnionI.stop() && + VirtRegI->end > LiveUnionI.start()) { + // This is an overlap, record the interfering register. + LiveInterval *VReg = LiveUnionI.value(); + if (VReg != RecentReg && !isSeenInterference(VReg)) { + RecentReg = VReg; + InterferingVRegs.push_back(VReg); + if (InterferingVRegs.size() >= MaxInterferingRegs) + return InterferingVRegs.size(); + } + // This LiveUnion segment is no longer interesting. + if (!(++LiveUnionI).valid()) { + SeenAllInterferences = true; + return InterferingVRegs.size(); + } + } + + // The iterators are now not overlapping, LiveUnionI has been advanced + // beyond VirtRegI. + assert(VirtRegI->end <= LiveUnionI.start() && "Expected non-overlap"); + + // Advance the iterator that ends first. + VirtRegI = VirtReg->advanceTo(VirtRegI, LiveUnionI.start()); + if (VirtRegI == VirtRegEnd) + break; + + // Detect overlap, handle above. + if (VirtRegI->start < LiveUnionI.stop()) + continue; + + // Still not overlapping. Catch up LiveUnionI. + LiveUnionI.advanceTo(VirtRegI->start); + } + SeenAllInterferences = true; + return InterferingVRegs.size(); +} + +void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, + unsigned NSize) { + // Reuse existing allocation. + if (NSize == Size) + return; + clear(); + Size = NSize; + LIUs = static_cast<LiveIntervalUnion*>( + malloc(sizeof(LiveIntervalUnion)*NSize)); + for (unsigned i = 0; i != Size; ++i) + new(LIUs + i) LiveIntervalUnion(Alloc); +} + +void LiveIntervalUnion::Array::clear() { + if (!LIUs) + return; + for (unsigned i = 0; i != Size; ++i) + LIUs[i].~LiveIntervalUnion(); + free(LIUs); + Size = 0; + LIUs = nullptr; +} diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp new file mode 100644 index 0000000..efbbcbe --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp @@ -0,0 +1,174 @@ +//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LivePhysRegs utility for tracking liveness of +// physical registers across machine instructions in forward or backward order. +// A more detailed description can be found in the corresponding header file. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + + +/// \brief Remove all registers from the set that get clobbered by the register +/// mask. +/// The clobbers set will be the list of live registers clobbered +/// by the regmask. +void LivePhysRegs::removeRegsInMask(const MachineOperand &MO, + SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> *Clobbers) { + SparseSet<unsigned>::iterator LRI = LiveRegs.begin(); + while (LRI != LiveRegs.end()) { + if (MO.clobbersPhysReg(*LRI)) { + if (Clobbers) + Clobbers->push_back(std::make_pair(*LRI, &MO)); + LRI = LiveRegs.erase(LRI); + } else + ++LRI; + } +} + +/// Simulates liveness when stepping backwards over an instruction(bundle): +/// Remove Defs, add uses. This is the recommended way of calculating liveness. +void LivePhysRegs::stepBackward(const MachineInstr &MI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg); + } else if (O->isRegMask()) + removeRegsInMask(*O, nullptr); + } + + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg); + } +} + +/// Simulates liveness when stepping forward over an instruction(bundle): Remove +/// killed-uses, add defs. This is the not recommended way, because it depends +/// on accurate kill flags. If possible use stepBackward() instead of this +/// function. +void LivePhysRegs::stepForward(const MachineInstr &MI, + SmallVectorImpl<std::pair<unsigned, const MachineOperand*>> &Clobbers) { + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + // Note, dead defs are still recorded. The caller should decide how to + // handle them. + Clobbers.push_back(std::make_pair(Reg, &*O)); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg); + } + } else if (O->isRegMask()) + removeRegsInMask(*O, &Clobbers); + } + + // Add defs to the set. + for (auto Reg : Clobbers) { + // Skip dead defs. They shouldn't be added to the set. + if (Reg.second->isReg() && Reg.second->isDead()) + continue; + addReg(Reg.first); + } +} + +/// Prin the currently live registers to OS. +void LivePhysRegs::print(raw_ostream &OS) const { + OS << "Live Registers:"; + if (!TRI) { + OS << " (uninitialized)\n"; + return; + } + + if (empty()) { + OS << " (empty)\n"; + return; + } + + for (const_iterator I = begin(), E = end(); I != E; ++I) + OS << " " << PrintReg(*I, TRI); + OS << "\n"; +} + +/// Dumps the currently live registers to the debug output. +void LivePhysRegs::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " " << *this; +#endif +} + +/// Add live-in registers of basic block \p MBB to \p LiveRegs. +static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { + for (const auto &LI : MBB.liveins()) + LiveRegs.addReg(LI.PhysReg); +} + +/// Add pristine registers to the given \p LiveRegs. This function removes +/// actually saved callee save registers when \p InPrologueEpilogue is false. +static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF, + const TargetRegisterInfo &TRI) { + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + if (!MFI.isCalleeSavedInfoValid()) + return; + + for (const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + LiveRegs.addReg(*CSR); + for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) + LiveRegs.removeReg(Info.getReg()); +} + +void LivePhysRegs::addLiveOuts(const MachineBasicBlock *MBB, + bool AddPristinesAndCSRs) { + if (AddPristinesAndCSRs) { + const MachineFunction &MF = *MBB->getParent(); + addPristines(*this, MF, *TRI); + if (!MBB->isReturnBlock()) { + // The return block has no successors whose live-ins we could merge + // below. So instead we add the callee saved registers manually. + for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) + addReg(*I); + } + } + + // To get the live-outs we simply merge the live-ins of all successors. + for (const MachineBasicBlock *Succ : MBB->successors()) + ::addLiveIns(*this, *Succ); +} + +void LivePhysRegs::addLiveIns(const MachineBasicBlock *MBB, + bool AddPristines) { + if (AddPristines) { + const MachineFunction &MF = *MBB->getParent(); + addPristines(*this, MF, *TRI); + } + ::addLiveIns(*this, *MBB); +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp new file mode 100644 index 0000000..c408615 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -0,0 +1,468 @@ +//===---- LiveRangeCalc.cpp - Calculate live ranges -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the LiveRangeCalc class. +// +//===----------------------------------------------------------------------===// + +#include "LiveRangeCalc.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +void LiveRangeCalc::resetLiveOutMap() { + unsigned NumBlocks = MF->getNumBlockIDs(); + Seen.clear(); + Seen.resize(NumBlocks); + Map.resize(NumBlocks); +} + +void LiveRangeCalc::reset(const MachineFunction *mf, + SlotIndexes *SI, + MachineDominatorTree *MDT, + VNInfo::Allocator *VNIA) { + MF = mf; + MRI = &MF->getRegInfo(); + Indexes = SI; + DomTree = MDT; + Alloc = VNIA; + resetLiveOutMap(); + LiveIn.clear(); +} + + +static void createDeadDef(SlotIndexes &Indexes, VNInfo::Allocator &Alloc, + LiveRange &LR, const MachineOperand &MO) { + const MachineInstr *MI = MO.getParent(); + SlotIndex DefIdx = + Indexes.getInstructionIndex(MI).getRegSlot(MO.isEarlyClobber()); + + // Create the def in LR. This may find an existing def. + LR.createDeadDef(DefIdx, Alloc); +} + +void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) { + assert(MRI && Indexes && "call reset() first"); + + // Step 1: Create minimal live segments for every definition of Reg. + // Visit all def operands. If the same instruction has multiple defs of Reg, + // createDeadDef() will deduplicate. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + unsigned Reg = LI.reg; + for (const MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + if (!MO.isDef() && !MO.readsReg()) + continue; + + unsigned SubReg = MO.getSubReg(); + if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) { + LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg) + : MRI->getMaxLaneMaskForVReg(Reg); + + // If this is the first time we see a subregister def, initialize + // subranges by creating a copy of the main range. + if (!LI.hasSubRanges() && !LI.empty()) { + LaneBitmask ClassMask = MRI->getMaxLaneMaskForVReg(Reg); + LI.createSubRangeFrom(*Alloc, ClassMask, LI); + } + + for (LiveInterval::SubRange &S : LI.subranges()) { + // A Mask for subregs common to the existing subrange and current def. + LaneBitmask Common = S.LaneMask & Mask; + if (Common == 0) + continue; + // A Mask for subregs covered by the subrange but not the current def. + LaneBitmask LRest = S.LaneMask & ~Mask; + LiveInterval::SubRange *CommonRange; + if (LRest != 0) { + // Split current subrange into Common and LRest ranges. + S.LaneMask = LRest; + CommonRange = LI.createSubRangeFrom(*Alloc, Common, S); + } else { + assert(Common == S.LaneMask); + CommonRange = &S; + } + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, *CommonRange, MO); + Mask &= ~Common; + } + // Create a new SubRange for subregs we did not cover yet. + if (Mask != 0) { + LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask); + if (MO.isDef()) + createDeadDef(*Indexes, *Alloc, *NewRange, MO); + } + } + + // Create the def in the main liverange. We do not have to do this if + // subranges are tracked as we recreate the main range later in this case. + if (MO.isDef() && !LI.hasSubRanges()) + createDeadDef(*Indexes, *Alloc, LI, MO); + } + + // We may have created empty live ranges for partially undefined uses, we + // can't keep them because we won't find defs in them later. + LI.removeEmptySubRanges(); + + // Step 2: Extend live segments to all uses, constructing SSA form as + // necessary. + if (LI.hasSubRanges()) { + for (LiveInterval::SubRange &S : LI.subranges()) { + resetLiveOutMap(); + extendToUses(S, Reg, S.LaneMask); + } + LI.clear(); + LI.constructMainRangeFromSubranges(*Indexes, *Alloc); + } else { + resetLiveOutMap(); + extendToUses(LI, Reg, ~0u); + } +} + + +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { + assert(MRI && Indexes && "call reset() first"); + + // Visit all def operands. If the same instruction has multiple defs of Reg, + // LR.createDeadDef() will deduplicate. + for (MachineOperand &MO : MRI->def_operands(Reg)) + createDeadDef(*Indexes, *Alloc, LR, MO); +} + + +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, + LaneBitmask Mask) { + // Visit all operands that read Reg. This may include partial defs. + const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo(); + for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) { + // Clear all kill flags. They will be reinserted after register allocation + // by LiveIntervalAnalysis::addKillFlags(). + if (MO.isUse()) + MO.setIsKill(false); + else { + // We only care about uses, but on the main range (mask ~0u) this includes + // the "virtual" reads happening for subregister defs. + if (Mask != ~0u) + continue; + } + + if (!MO.readsReg()) + continue; + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg); + // Ignore uses not covering the current subrange. + if ((SubRegMask & Mask) == 0) + continue; + } + + // Determine the actual place of the use. + const MachineInstr *MI = MO.getParent(); + unsigned OpNo = (&MO - &MI->getOperand(0)); + SlotIndex UseIdx; + if (MI->isPHI()) { + assert(!MO.isDef() && "Cannot handle PHI def of partial register."); + // The actual place where a phi operand is used is the end of the pred + // MBB. PHI operands are paired: (Reg, PredMBB). + UseIdx = Indexes->getMBBEndIdx(MI->getOperand(OpNo+1).getMBB()); + } else { + // Check for early-clobber redefs. + bool isEarlyClobber = false; + unsigned DefIdx; + if (MO.isDef()) + isEarlyClobber = MO.isEarlyClobber(); + else if (MI->isRegTiedToDefOperand(OpNo, &DefIdx)) { + // FIXME: This would be a lot easier if tied early-clobber uses also + // had an early-clobber flag. + isEarlyClobber = MI->getOperand(DefIdx).isEarlyClobber(); + } + UseIdx = Indexes->getInstructionIndex(MI).getRegSlot(isEarlyClobber); + } + + // MI is reading Reg. We may have visited MI before if it happens to be + // reading Reg multiple times. That is OK, extend() is idempotent. + extend(LR, UseIdx, Reg); + } +} + + +void LiveRangeCalc::updateFromLiveIns() { + LiveRangeUpdater Updater; + for (const LiveInBlock &I : LiveIn) { + if (!I.DomNode) + continue; + MachineBasicBlock *MBB = I.DomNode->getBlock(); + assert(I.Value && "No live-in value found"); + SlotIndex Start, End; + std::tie(Start, End) = Indexes->getMBBRange(MBB); + + if (I.Kill.isValid()) + // Value is killed inside this block. + End = I.Kill; + else { + // The value is live-through, update LiveOut as well. + // Defer the Domtree lookup until it is needed. + assert(Seen.test(MBB->getNumber())); + Map[MBB] = LiveOutPair(I.Value, nullptr); + } + Updater.setDest(&I.LR); + Updater.add(Start, End, I.Value); + } + LiveIn.clear(); +} + + +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) { + assert(Use.isValid() && "Invalid SlotIndex"); + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + + MachineBasicBlock *UseMBB = Indexes->getMBBFromIndex(Use.getPrevSlot()); + assert(UseMBB && "No MBB at Use"); + + // Is there a def in the same MBB we can extend? + if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use)) + return; + + // Find the single reaching def, or determine if Use is jointly dominated by + // multiple values, and we may need to create even more phi-defs to preserve + // VNInfo SSA form. Perform a search for all predecessor blocks where we + // know the dominating VNInfo. + if (findReachingDefs(LR, *UseMBB, Use, PhysReg)) + return; + + // When there were multiple different values, we may need new PHIs. + calculateValues(); +} + + +// This function is called by a client after using the low-level API to add +// live-out and live-in blocks. The unique value optimization is not +// available, SplitEditor::transferValues handles that case directly anyway. +void LiveRangeCalc::calculateValues() { + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + updateSSA(); + updateFromLiveIns(); +} + + +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, + SlotIndex Use, unsigned PhysReg) { + unsigned UseMBBNum = UseMBB.getNumber(); + + // Block numbers where LR should be live-in. + SmallVector<unsigned, 16> WorkList(1, UseMBBNum); + + // Remember if we have seen more than one value. + bool UniqueVNI = true; + VNInfo *TheVNI = nullptr; + + // Using Seen as a visited set, perform a BFS for all reaching defs. + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); + +#ifndef NDEBUG + if (MBB->pred_empty()) { + MBB->getParent()->verify(); + errs() << "Use of " << PrintReg(PhysReg) + << " does not have a corresponding definition on every path:\n"; + const MachineInstr *MI = Indexes->getInstructionFromIndex(Use); + if (MI != nullptr) + errs() << Use << " " << *MI; + llvm_unreachable("Use not jointly dominated by defs."); + } + + if (TargetRegisterInfo::isPhysicalRegister(PhysReg) && + !MBB->isLiveIn(PhysReg)) { + MBB->getParent()->verify(); + errs() << "The register " << PrintReg(PhysReg) + << " needs to be live in to BB#" << MBB->getNumber() + << ", but is missing from the live-in list.\n"; + llvm_unreachable("Invalid global physical register"); + } +#endif + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + MachineBasicBlock *Pred = *PI; + + // Is this a known live-out block? + if (Seen.test(Pred->getNumber())) { + if (VNInfo *VNI = Map[Pred].first) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + } + continue; + } + + SlotIndex Start, End; + std::tie(Start, End) = Indexes->getMBBRange(Pred); + + // First time we see Pred. Try to determine the live-out value, but set + // it as null if Pred is live-through with an unknown value. + VNInfo *VNI = LR.extendInBlock(Start, End); + setLiveOutValue(Pred, VNI); + if (VNI) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + continue; + } + + // No, we need a live-in value for Pred as well + if (Pred != &UseMBB) + WorkList.push_back(Pred->getNumber()); + else + // Loopback to UseMBB, so value is really live through. + Use = SlotIndex(); + } + } + + LiveIn.clear(); + + // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but + // neither require it. Skip the sorting overhead for small updates. + if (WorkList.size() > 4) + array_pod_sort(WorkList.begin(), WorkList.end()); + + // If a unique reaching def was found, blit in the live ranges immediately. + if (UniqueVNI) { + LiveRangeUpdater Updater(&LR); + for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { + SlotIndex Start, End; + std::tie(Start, End) = Indexes->getMBBRange(*I); + // Trim the live range in UseMBB. + if (*I == UseMBBNum && Use.isValid()) + End = Use; + else + Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr); + Updater.add(Start, End, TheVNI); + } + return true; + } + + // Multiple values were found, so transfer the work list to the LiveIn array + // where UpdateSSA will use it as a work list. + LiveIn.reserve(WorkList.size()); + for (SmallVectorImpl<unsigned>::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + addLiveInBlock(LR, DomTree->getNode(MBB)); + if (MBB == &UseMBB) + LiveIn.back().Kill = Use; + } + + return false; +} + + +// This is essentially the same iterative algorithm that SSAUpdater uses, +// except we already have a dominator tree, so we don't have to recompute it. +void LiveRangeCalc::updateSSA() { + assert(Indexes && "Missing SlotIndexes"); + assert(DomTree && "Missing dominator tree"); + + // Interate until convergence. + unsigned Changes; + do { + Changes = 0; + // Propagate live-out values down the dominator tree, inserting phi-defs + // when necessary. + for (LiveInBlock &I : LiveIn) { + MachineDomTreeNode *Node = I.DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; + MachineBasicBlock *MBB = Node->getBlock(); + MachineDomTreeNode *IDom = Node->getIDom(); + LiveOutPair IDomValue; + + // We need a live-in value to a block with no immediate dominator? + // This is probably an unreachable block that has survived somehow. + bool needPHI = !IDom || !Seen.test(IDom->getBlock()->getNumber()); + + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. + if (!needPHI) { + IDomValue = Map[IDom->getBlock()]; + + // Cache the DomTree node that defined the value. + if (IDomValue.first && !IDomValue.second) + Map[IDom->getBlock()].second = IDomValue.second = + DomTree->getNode(Indexes->getMBBFromIndex(IDomValue.first->def)); + + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + LiveOutPair &Value = Map[*PI]; + if (!Value.first || Value.first == IDomValue.first) + continue; + + // Cache the DomTree node that defined the value. + if (!Value.second) + Value.second = + DomTree->getNode(Indexes->getMBBFromIndex(Value.first->def)); + + // This predecessor is carrying something other than IDomValue. + // It could be because IDomValue hasn't propagated yet, or it could be + // because MBB is in the dominance frontier of that value. + if (DomTree->dominates(IDom, Value.second)) { + needPHI = true; + break; + } + } + } + + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOut indicates a foreign or missing value. + LiveOutPair &LOP = Map[MBB]; + + // Create a phi-def if required. + if (needPHI) { + ++Changes; + assert(Alloc && "Need VNInfo allocator to create PHI-defs"); + SlotIndex Start, End; + std::tie(Start, End) = Indexes->getMBBRange(MBB); + LiveRange &LR = I.LR; + VNInfo *VNI = LR.getNextValue(Start, *Alloc); + I.Value = VNI; + // This block is done, we know the final value. + I.DomNode = nullptr; + + // Add liveness since updateFromLiveIns now skips this node. + if (I.Kill.isValid()) + LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI)); + else { + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); + LOP = LiveOutPair(VNI, Node); + } + } else if (IDomValue.first) { + // No phi-def here. Remember incoming value. + I.Value = IDomValue.first; + + // If the IDomValue is killed in the block, don't propagate through. + if (I.Kill.isValid()) + continue; + + // Propagate IDomValue if it isn't killed: + // MBB is live-out and doesn't define its own value. + if (LOP.first == IDomValue.first) + continue; + ++Changes; + LOP = IDomValue; + } + } + } while (Changes); +} diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h new file mode 100644 index 0000000..ff38c68 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -0,0 +1,243 @@ +//===---- LiveRangeCalc.h - Calculate live ranges ---------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeCalc class can be used to compute live ranges from scratch. It +// caches information about values in the CFG to speed up repeated operations +// on the same live range. The cache can be shared by non-overlapping live +// ranges. SplitKit uses that when computing the live range of split products. +// +// A low-level interface is available to clients that know where a variable is +// live, but don't know which value it has as every point. LiveRangeCalc will +// propagate values down the dominator tree, and even insert PHI-defs where +// needed. SplitKit uses this faster interface when possible. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H +#define LLVM_LIB_CODEGEN_LIVERANGECALC_H + +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/CodeGen/LiveInterval.h" + +namespace llvm { + +/// Forward declarations for MachineDominators.h: +class MachineDominatorTree; +template <class NodeT> class DomTreeNodeBase; +typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode; + +class LiveRangeCalc { + const MachineFunction *MF; + const MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + MachineDominatorTree *DomTree; + VNInfo::Allocator *Alloc; + + /// LiveOutPair - A value and the block that defined it. The domtree node is + /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)]. + typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair; + + /// LiveOutMap - Map basic blocks to the value leaving the block. + typedef IndexedMap<LiveOutPair, MBB2NumberFunctor> LiveOutMap; + + /// Bit vector of active entries in LiveOut, also used as a visited set by + /// findReachingDefs. One entry per basic block, indexed by block number. + /// This is kept as a separate bit vector because it can be cleared quickly + /// when switching live ranges. + BitVector Seen; + + /// Map each basic block where a live range is live out to the live-out value + /// and its defining block. + /// + /// For every basic block, MBB, one of these conditions shall be true: + /// + /// 1. !Seen.count(MBB->getNumber()) + /// Blocks without a Seen bit are ignored. + /// 2. LiveOut[MBB].second.getNode() == MBB + /// The live-out value is defined in MBB. + /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB] + /// The live-out value passses through MBB. All predecessors must carry + /// the same value. + /// + /// The domtree node may be null, it can be computed. + /// + /// The map can be shared by multiple live ranges as long as no two are + /// live-out of the same block. + LiveOutMap Map; + + /// LiveInBlock - Information about a basic block where a live range is known + /// to be live-in, but the value has not yet been determined. + struct LiveInBlock { + // The live range set that is live-in to this block. The algorithms can + // handle multiple non-overlapping live ranges simultaneously. + LiveRange &LR; + + // DomNode - Dominator tree node for the block. + // Cleared when the final value has been determined and LI has been updated. + MachineDomTreeNode *DomNode; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. When the final value has been + // determined, the range from the block start to Kill will be added to LI. + SlotIndex Kill; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill), Value(nullptr) {} + }; + + /// LiveIn - Work list of blocks where the live-in value has yet to be + /// determined. This list is typically computed by findReachingDefs() and + /// used as a work list by updateSSA(). The low-level interface may also be + /// used to add entries directly. + SmallVector<LiveInBlock, 16> LiveIn; + + /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can + /// reach it. + /// + /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB + /// are added to @p LR, and the function returns true. + /// + /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be + /// live in are added to the LiveIn array, and the function returns false. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB, + SlotIndex Kill, unsigned PhysReg); + + /// updateSSA - Compute the values that will be live in to all requested + /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. + /// + /// Every live-in block must be jointly dominated by the added live-out + /// blocks. No values are read from the live ranges. + void updateSSA(); + + /// Transfer information from the LiveIn vector to the live ranges and update + /// the given @p LiveOuts. + void updateFromLiveIns(); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask); + + /// Reset Map and Seen fields. + void resetLiveOutMap(); + +public: + LiveRangeCalc() : MF(nullptr), MRI(nullptr), Indexes(nullptr), + DomTree(nullptr), Alloc(nullptr) {} + + //===--------------------------------------------------------------------===// + // High-level interface. + //===--------------------------------------------------------------------===// + // + // Calculate live ranges from scratch. + // + + /// reset - Prepare caches for a new set of non-overlapping live ranges. The + /// caches must be reset before attempting calculations with a live range + /// that may overlap a previously computed live range, and before the first + /// live range in a function. If live ranges are not known to be + /// non-overlapping, call reset before each. + void reset(const MachineFunction *MF, + SlotIndexes*, + MachineDominatorTree*, + VNInfo::Allocator*); + + //===--------------------------------------------------------------------===// + // Mid-level interface. + //===--------------------------------------------------------------------===// + // + // Modify existing live ranges. + // + + /// Extend the live range of @p LR to reach @p Use. + /// + /// The existing values in @p LR must be live so they jointly dominate @p Use. + /// If @p Use is not dominated by a single existing value, PHI-defs are + /// inserted as required to preserve SSA form. + /// + /// PhysReg, when set, is used to verify live-in lists on basic blocks. + void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0); + + /// createDeadDefs - Create a dead def in LI for every def operand of Reg. + /// Each instruction defining Reg gets a new VNInfo with a corresponding + /// minimal live range. + void createDeadDefs(LiveRange &LR, unsigned Reg); + + /// Extend the live range of @p LR to reach all uses of Reg. + /// + /// All uses must be jointly dominated by existing liveness. PHI-defs are + /// inserted as needed to preserve SSA form. + void extendToUses(LiveRange &LR, unsigned PhysReg) { + extendToUses(LR, PhysReg, ~0u); + } + + /// Calculates liveness for the register specified in live interval @p LI. + /// Creates subregister live ranges as needed if subreg liveness tracking is + /// enabled. + void calculate(LiveInterval &LI, bool TrackSubRegs); + + //===--------------------------------------------------------------------===// + // Low-level interface. + //===--------------------------------------------------------------------===// + // + // These functions can be used to compute live ranges where the live-in and + // live-out blocks are already known, but the SSA value in each block is + // unknown. + // + // After calling reset(), add known live-out values and known live-in blocks. + // Then call calculateValues() to compute the actual value that is + // live-in to each block, and add liveness to the live ranges. + // + + /// setLiveOutValue - Indicate that VNI is live out from MBB. The + /// calculateValues() function will not add liveness for MBB, the caller + /// should take care of that. + /// + /// VNI may be null only if MBB is a live-through block also passed to + /// addLiveInBlock(). + void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) { + Seen.set(MBB->getNumber()); + Map[MBB] = LiveOutPair(VNI, nullptr); + } + + /// addLiveInBlock - Add a block with an unknown live-in value. This + /// function can only be called once per basic block. Once the live-in value + /// has been determined, calculateValues() will add liveness to LI. + /// + /// @param LR The live range that is live-in to the block. + /// @param DomNode The domtree node for the block. + /// @param Kill Index in block where LI is killed. If the value is + /// live-through, set Kill = SLotIndex() and also call + /// setLiveOutValue(MBB, 0). + void addLiveInBlock(LiveRange &LR, + MachineDomTreeNode *DomNode, + SlotIndex Kill = SlotIndex()) { + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); + } + + /// calculateValues - Calculate the value that will be live-in to each block + /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA + /// form. Add liveness to all live-in blocks up to the Kill point, or the + /// whole block for live-through blocks. + /// + /// Every predecessor of a live-in block must have been given a value with + /// setLiveOutValue, the value may be null for live-trough blocks. + void calculateValues(); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp new file mode 100644 index 0000000..5ce364a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -0,0 +1,418 @@ +//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The LiveRangeEdit class represents changes done to a virtual register when it +// is spilled or split. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumDCEDeleted, "Number of instructions deleted by DCE"); +STATISTIC(NumDCEFoldedLoads, "Number of single use loads folded after DCE"); +STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); + +void LiveRangeEdit::Delegate::anchor() { } + +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + LiveInterval &LI = LIS.createEmptyInterval(VReg); + return LI; +} + +unsigned LiveRangeEdit::createFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + return VReg; +} + +bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, + const MachineInstr *DefMI, + AliasAnalysis *aa) { + assert(DefMI && "Missing instruction"); + ScannedRemattable = true; + if (!TII.isTriviallyReMaterializable(DefMI, aa)) + return false; + Remattable.insert(VNI); + return true; +} + +void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) { + for (VNInfo *VNI : getParent().valnos) { + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = LIS.getInstructionFromIndex(VNI->def); + if (!DefMI) + continue; + checkRematerializable(VNI, DefMI, aa); + } + ScannedRemattable = true; +} + +bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { + if (!ScannedRemattable) + scanRemattable(aa); + return !Remattable.empty(); +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) const { + OrigIdx = OrigIdx.getRegSlot(true); + UseIdx = UseIdx.getRegSlot(true); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || !MO.readsReg()) + continue; + + // We can't remat physreg uses, unless it is a constant. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent())) + continue; + return false; + } + + LiveInterval &li = LIS.getInterval(MO.getReg()); + const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + + // Don't allow rematerialization immediately after the original def. + // It would be incorrect if OrigMI redefines the register. + // See PR14098. + if (SlotIndex::isSameInstr(OrigIdx, UseIdx)) + return false; + + if (OVNI != li.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +bool LiveRangeEdit::canRematerializeAt(Remat &RM, + SlotIndex UseIdx, + bool cheapAsAMove) { + assert(ScannedRemattable && "Call anyRematerializable first"); + + // Use scanRemattable info. + if (!Remattable.count(RM.ParentVNI)) + return false; + + // No defining instruction provided. + SlotIndex DefIdx; + if (RM.OrigMI) + DefIdx = LIS.getInstructionIndex(RM.OrigMI); + else { + DefIdx = RM.ParentVNI->def; + RM.OrigMI = LIS.getInstructionFromIndex(DefIdx); + assert(RM.OrigMI && "No defining instruction for remattable value"); + } + + // If only cheap remats were requested, bail out early. + if (cheapAsAMove && !TII.isAsCheapAsAMove(RM.OrigMI)) + return false; + + // Verify that all used registers are available with the same values. + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx)) + return false; + + return true; +} + +SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, + const Remat &RM, + const TargetRegisterInfo &tri, + bool Late) { + assert(RM.OrigMI && "Invalid remat"); + TII.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); + Rematted.insert(RM.ParentVNI); + return LIS.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getRegSlot(); +} + +void LiveRangeEdit::eraseVirtReg(unsigned Reg) { + if (TheDelegate && TheDelegate->LRE_CanEraseVirtReg(Reg)) + LIS.removeInterval(Reg); +} + +bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, + SmallVectorImpl<MachineInstr*> &Dead) { + MachineInstr *DefMI = nullptr, *UseMI = nullptr; + + // Check that there is a single def and a single use. + for (MachineOperand &MO : MRI.reg_nodbg_operands(LI->reg)) { + MachineInstr *MI = MO.getParent(); + if (MO.isDef()) { + if (DefMI && DefMI != MI) + return false; + if (!MI->canFoldAsLoad()) + return false; + DefMI = MI; + } else if (!MO.isUndef()) { + if (UseMI && UseMI != MI) + return false; + // FIXME: Targets don't know how to fold subreg uses. + if (MO.getSubReg()) + return false; + UseMI = MI; + } + } + if (!DefMI || !UseMI) + return false; + + // Since we're moving the DefMI load, make sure we're not extending any live + // ranges. + if (!allUsesAvailableAt(DefMI, + LIS.getInstructionIndex(DefMI), + LIS.getInstructionIndex(UseMI))) + return false; + + // We also need to make sure it is safe to move the load. + // Assume there are stores between DefMI and UseMI. + bool SawStore = true; + if (!DefMI->isSafeToMove(nullptr, SawStore)) + return false; + + DEBUG(dbgs() << "Try to fold single def: " << *DefMI + << " into single use: " << *UseMI); + + SmallVector<unsigned, 8> Ops; + if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + return false; + + MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI); + if (!FoldMI) + return false; + DEBUG(dbgs() << " folded: " << *FoldMI); + LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); + UseMI->eraseFromParent(); + DefMI->addRegisterDead(LI->reg, nullptr); + Dead.push_back(DefMI); + ++NumDCEFoldedLoads; + return true; +} + +bool LiveRangeEdit::useIsKill(const LiveInterval &LI, + const MachineOperand &MO) const { + const MachineInstr *MI = MO.getParent(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (LI.Query(Idx).isKill()) + return true; + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned SubReg = MO.getSubReg(); + LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg); + for (const LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill()) + return true; + } + return false; +} + +/// Find all live intervals that need to shrink, then remove the instruction. +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + + // Never delete a bundled instruction. + if (MI->isBundled()) { + return; + } + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + return; + } + + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(nullptr, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + return; + } + + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Collect virtual registers to be erased after MI is gone. + SmallVector<unsigned, 8> RegsToErase; + bool ReadsPhysRegs = false; + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + ReadsPhysRegs = true; + else if (MOI->isDef()) + LIS.removePhysRegDefAt(Reg, Idx); + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if ((MI->readsVirtualRegister(Reg) && (MI->isCopy() || MOI->isDef())) || + (MOI->readsReg() && (MRI.hasOneNonDBGUse(Reg) || useIsKill(LI, *MOI)))) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (TheDelegate && LI.getVNInfoAt(Idx) != nullptr) + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LIS.removeVRegDefAt(LI, Idx); + if (LI.empty()) + RegsToErase.push_back(Reg); + } + } + + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); + } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be <undef> + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead, + ArrayRef<unsigned> RegsBeingSpilled) { + ToShrinkSet ToShrink; + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) + eliminateDeadDef(Dead.pop_back_val(), ToShrink); + + if (ToShrink.empty()) + break; + + // Shrink just one live interval. Then delete new dead defs. + LiveInterval *LI = ToShrink.back(); + ToShrink.pop_back(); + if (foldAsLoad(LI, Dead)) + continue; + unsigned VReg = LI->reg; + if (TheDelegate) + TheDelegate->LRE_WillShrinkVirtReg(VReg); + if (!LIS.shrinkToUses(LI, &Dead)) + continue; + + // Don't create new intervals for a register being spilled. + // The new intervals would have to be spilled anyway so its not worth it. + // Also they currently aren't spilled so creating them and not spilling + // them results in incorrect code. + bool BeingSpilled = false; + for (unsigned i = 0, e = RegsBeingSpilled.size(); i != e; ++i) { + if (VReg == RegsBeingSpilled[i]) { + BeingSpilled = true; + break; + } + } + + if (BeingSpilled) continue; + + // LI may have been separated, create new intervals. + LI->RenumberValues(); + SmallVector<LiveInterval*, 8> SplitLIs; + LIS.splitSeparateComponents(*LI, SplitLIs); + if (!SplitLIs.empty()) + ++NumFracRanges; + + unsigned Original = VRM ? VRM->getOriginal(VReg) : 0; + for (const LiveInterval *SplitLI : SplitLIs) { + // If LI is an original interval that hasn't been split yet, make the new + // intervals their own originals instead of referring to LI. The original + // interval must contain all the split products, and LI doesn't. + if (Original != VReg && Original != 0) + VRM->setIsSplitFromReg(SplitLI->reg, Original); + if (TheDelegate) + TheDelegate->LRE_DidCloneVirtReg(SplitLI->reg, VReg); + } + } +} + +// Keep track of new virtual registers created via +// MachineRegisterInfo::createVirtualRegister. +void +LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) +{ + if (VRM) + VRM->grow(); + + NewRegs.push_back(VReg); +} + +void +LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) { + VirtRegAuxInfo VRAI(MF, LIS, VRM, Loops, MBFI); + for (unsigned I = 0, Size = size(); I < Size; ++I) { + LiveInterval &LI = LIS.getInterval(get(I)); + if (MRI.recomputeRegClass(LI.reg)) + DEBUG({ + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + dbgs() << "Inflated " << PrintReg(LI.reg) << " to " + << TRI->getRegClassName(MRI.getRegClass(LI.reg)) << '\n'; + }); + VRAI.calculateSpillWeightAndHint(LI); + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp new file mode 100644 index 0000000..7ee87c1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -0,0 +1,197 @@ +//===-- LiveRegMatrix.cpp - Track register interference -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the LiveRegMatrix analysis pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "RegisterCoalescer.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumAssigned , "Number of registers assigned"); +STATISTIC(NumUnassigned , "Number of registers unassigned"); + +char LiveRegMatrix::ID = 0; +INITIALIZE_PASS_BEGIN(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(LiveRegMatrix, "liveregmatrix", + "Live Register Matrix", false, false) + +LiveRegMatrix::LiveRegMatrix() : MachineFunctionPass(ID), + UserTag(0), RegMaskTag(0), RegMaskVirtReg(0) {} + +void LiveRegMatrix::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<LiveIntervals>(); + AU.addRequiredTransitive<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool LiveRegMatrix::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getSubtarget().getRegisterInfo(); + LIS = &getAnalysis<LiveIntervals>(); + VRM = &getAnalysis<VirtRegMap>(); + + unsigned NumRegUnits = TRI->getNumRegUnits(); + if (NumRegUnits != Matrix.size()) + Queries.reset(new LiveIntervalUnion::Query[NumRegUnits]); + Matrix.init(LIUAlloc, NumRegUnits); + + // Make sure no stale queries get reused. + invalidateVirtRegs(); + return false; +} + +void LiveRegMatrix::releaseMemory() { + for (unsigned i = 0, e = Matrix.size(); i != e; ++i) { + Matrix[i].clear(); + // No need to clear Queries here, since LiveIntervalUnion::Query doesn't + // have anything important to clear and LiveRegMatrix's runOnFunction() + // does a std::unique_ptr::reset anyways. + } +} + +template<typename Callable> +bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval, + unsigned PhysReg, Callable Func) { + if (VRegInterval.hasSubRanges()) { + for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + unsigned Unit = (*Units).first; + LaneBitmask Mask = (*Units).second; + for (LiveInterval::SubRange &S : VRegInterval.subranges()) { + if (S.LaneMask & Mask) { + if (Func(Unit, S)) + return true; + break; + } + } + } + } else { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (Func(*Units, VRegInterval)) + return true; + } + } + return false; +} + +void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) { + DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI) + << " to " << PrintReg(PhysReg, TRI) << ':'); + assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); + VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + + foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << ' ' << Range); + Matrix[Unit].unify(VirtReg, Range); + return false; + }); + + ++NumAssigned; + DEBUG(dbgs() << '\n'); +} + +void LiveRegMatrix::unassign(LiveInterval &VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg.reg); + DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI) + << " from " << PrintReg(PhysReg, TRI) << ':'); + VRM->clearVirt(VirtReg.reg); + + foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI)); + Matrix[Unit].extract(VirtReg, Range); + return false; + }); + + ++NumUnassigned; + DEBUG(dbgs() << '\n'); +} + +bool LiveRegMatrix::isPhysRegUsed(unsigned PhysReg) const { + for (MCRegUnitIterator Unit(PhysReg, TRI); Unit.isValid(); ++Unit) { + if (!Matrix[*Unit].empty()) + return true; + } + return false; +} + +bool LiveRegMatrix::checkRegMaskInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + // Check if the cached information is valid. + // The same BitVector can be reused for all PhysRegs. + // We could cache multiple VirtRegs if it becomes necessary. + if (RegMaskVirtReg != VirtReg.reg || RegMaskTag != UserTag) { + RegMaskVirtReg = VirtReg.reg; + RegMaskTag = UserTag; + RegMaskUsable.clear(); + LIS->checkRegMaskInterference(VirtReg, RegMaskUsable); + } + + // The BitVector is indexed by PhysReg, not register unit. + // Regmask interference is more fine grained than regunits. + // For example, a Win64 call can clobber %ymm8 yet preserve %xmm8. + return !RegMaskUsable.empty() && (!PhysReg || !RegMaskUsable.test(PhysReg)); +} + +bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, + unsigned PhysReg) { + if (VirtReg.empty()) + return false; + CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); + + bool Result = foreachUnit(TRI, VirtReg, PhysReg, [&](unsigned Unit, + const LiveRange &Range) { + const LiveRange &UnitRange = LIS->getRegUnit(Unit); + return Range.overlaps(UnitRange, CP, *LIS->getSlotIndexes()); + }); + return Result; +} + +LiveIntervalUnion::Query &LiveRegMatrix::query(LiveInterval &VirtReg, + unsigned RegUnit) { + LiveIntervalUnion::Query &Q = Queries[RegUnit]; + Q.init(UserTag, &VirtReg, &Matrix[RegUnit]); + return Q; +} + +LiveRegMatrix::InterferenceKind +LiveRegMatrix::checkInterference(LiveInterval &VirtReg, unsigned PhysReg) { + if (VirtReg.empty()) + return IK_Free; + + // Regmask interference is the fastest check. + if (checkRegMaskInterference(VirtReg, PhysReg)) + return IK_RegMask; + + // Check for fixed interference. + if (checkRegUnitInterference(VirtReg, PhysReg)) + return IK_RegUnit; + + // Check the matrix for virtual register interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (query(VirtReg, *Units).checkInterference()) + return IK_VirtReg; + + return IK_Free; +} diff --git a/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp new file mode 100644 index 0000000..5c9c679 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveStackAnalysis.cpp @@ -0,0 +1,90 @@ +//===-- LiveStackAnalysis.cpp - Live Stack Slot Analysis ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the live stack slot analysis pass. It is analogous to +// live interval analysis except it's analyzing liveness of stack slots rather +// than registers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <limits> +using namespace llvm; + +#define DEBUG_TYPE "livestacks" + +char LiveStacks::ID = 0; +INITIALIZE_PASS_BEGIN(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_END(LiveStacks, "livestacks", + "Live Stack Slot Analysis", false, false) + +char &llvm::LiveStacksID = LiveStacks::ID; + +void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addPreserved<SlotIndexes>(); + AU.addRequiredTransitive<SlotIndexes>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void LiveStacks::releaseMemory() { + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); + S2IMap.clear(); + S2RCMap.clear(); +} + +bool LiveStacks::runOnMachineFunction(MachineFunction &MF) { + TRI = MF.getSubtarget().getRegisterInfo(); + // FIXME: No analysis is being done right now. We are relying on the + // register allocators to provide the information. + return false; +} + +LiveInterval & +LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) { + assert(Slot >= 0 && "Spill slot indice must be >= 0"); + SS2IntervalMap::iterator I = S2IMap.find(Slot); + if (I == S2IMap.end()) { + I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot), + std::forward_as_tuple( + TargetRegisterInfo::index2StackSlot(Slot), 0.0F)) + .first; + S2RCMap.insert(std::make_pair(Slot, RC)); + } else { + // Use the largest common subclass register class. + const TargetRegisterClass *OldRC = S2RCMap[Slot]; + S2RCMap[Slot] = TRI->getCommonSubClass(OldRC, RC); + } + return I->second; +} + +/// print - Implement the dump method. +void LiveStacks::print(raw_ostream &OS, const Module*) const { + + OS << "********** INTERVALS **********\n"; + for (const_iterator I = begin(), E = end(); I != E; ++I) { + I->second.print(OS); + int Slot = I->first; + const TargetRegisterClass *RC = getIntervalRegClass(Slot); + if (RC) + OS << " [" << TRI->getRegClassName(RC) << "]\n"; + else + OS << " [Unknown]\n"; + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp new file mode 100644 index 0000000..06b86d8 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -0,0 +1,809 @@ +//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveVariable analysis pass. For each machine +// instruction in the function, this pass calculates the set of registers that +// are immediately dead after the instruction (i.e., the instruction calculates +// the value, but it is never used) and the set of registers that are used by +// the instruction, but are never used after the instruction (i.e., they are +// killed). +// +// This class computes live variables using a sparse implementation based on +// the machine code SSA form. This class computes live variable information for +// each virtual and _register allocatable_ physical register in a function. It +// uses the dominance properties of SSA form to efficiently compute live +// variables for virtual registers, and assumes that physical registers are only +// live within a single basic block (allowing it to do a single local analysis +// to resolve physical register lifetimes in each basic block). If a physical +// register is not register allocatable, it is not tracked. This is useful for +// things like the stack pointer and condition codes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <algorithm> +using namespace llvm; + +char LiveVariables::ID = 0; +char &llvm::LiveVariablesID = LiveVariables::ID; +INITIALIZE_PASS_BEGIN(LiveVariables, "livevars", + "Live Variable Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim) +INITIALIZE_PASS_END(LiveVariables, "livevars", + "Live Variable Analysis", false, false) + + +void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequiredID(UnreachableMachineBlockElimID); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr * +LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + if (Kills[i]->getParent() == MBB) + return Kills[i]; + return nullptr; +} + +void LiveVariables::VarInfo::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " Alive in blocks: "; + for (SparseBitVector<>::iterator I = AliveBlocks.begin(), + E = AliveBlocks.end(); I != E; ++I) + dbgs() << *I << ", "; + dbgs() << "\n Killed by:"; + if (Kills.empty()) + dbgs() << " No instructions.\n"; + else { + for (unsigned i = 0, e = Kills.size(); i != e; ++i) + dbgs() << "\n #" << i << ": " << *Kills[i]; + dbgs() << "\n"; + } +#endif +} + +/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg. +LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) { + assert(TargetRegisterInfo::isVirtualRegister(RegIdx) && + "getVarInfo: not a virtual register!"); + VirtRegInfo.grow(RegIdx); + return VirtRegInfo[RegIdx]; +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB, + std::vector<MachineBasicBlock*> &WorkList) { + unsigned BBNum = MBB->getNumber(); + + // Check to see if this basic block is one of the killing blocks. If so, + // remove it. + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + if (VRInfo.Kills[i]->getParent() == MBB) { + VRInfo.Kills.erase(VRInfo.Kills.begin()+i); // Erase entry + break; + } + + if (MBB == DefBlock) return; // Terminate recursion + + if (VRInfo.AliveBlocks.test(BBNum)) + return; // We already know the block is live + + // Mark the variable known alive in this bb + VRInfo.AliveBlocks.set(BBNum); + + assert(MBB != &MF->front() && "Can't find reaching def for virtreg"); + WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); +} + +void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, + MachineBasicBlock *DefBlock, + MachineBasicBlock *MBB) { + std::vector<MachineBasicBlock*> WorkList; + MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList); + + while (!WorkList.empty()) { + MachineBasicBlock *Pred = WorkList.back(); + WorkList.pop_back(); + MarkVirtRegAliveInBlock(VRInfo, DefBlock, Pred, WorkList); + } +} + +void LiveVariables::HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, + MachineInstr *MI) { + assert(MRI->getVRegDef(reg) && "Register use before def!"); + + unsigned BBNum = MBB->getNumber(); + + VarInfo& VRInfo = getVarInfo(reg); + + // Check to see if this basic block is already a kill block. + if (!VRInfo.Kills.empty() && VRInfo.Kills.back()->getParent() == MBB) { + // Yes, this register is killed in this basic block already. Increase the + // live range by updating the kill instruction. + VRInfo.Kills.back() = MI; + return; + } + +#ifndef NDEBUG + for (unsigned i = 0, e = VRInfo.Kills.size(); i != e; ++i) + assert(VRInfo.Kills[i]->getParent() != MBB && "entry should be at end!"); +#endif + + // This situation can occur: + // + // ,------. + // | | + // | v + // | t2 = phi ... t1 ... + // | | + // | v + // | t1 = ... + // | ... = ... t1 ... + // | | + // `------' + // + // where there is a use in a PHI node that's a predecessor to the defining + // block. We don't want to mark all predecessors as having the value "alive" + // in this case. + if (MBB == MRI->getVRegDef(reg)->getParent()) return; + + // Add a new kill entry for this basic block. If this virtual register is + // already marked as alive in this basic block, that means it is alive in at + // least one of the successor blocks, it's not a kill. + if (!VRInfo.AliveBlocks.test(BBNum)) + VRInfo.Kills.push_back(MI); + + // Update all dominating blocks to mark them as "known live". + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + E = MBB->pred_end(); PI != E; ++PI) + MarkVirtRegAliveInBlock(VRInfo, MRI->getVRegDef(reg)->getParent(), *PI); +} + +void LiveVariables::HandleVirtRegDef(unsigned Reg, MachineInstr *MI) { + VarInfo &VRInfo = getVarInfo(Reg); + + if (VRInfo.AliveBlocks.empty()) + // If vr is not alive in any block, then defaults to dead. + VRInfo.Kills.push_back(MI); +} + +/// FindLastPartialDef - Return the last partial def of the specified register. +/// Also returns the sub-registers that're defined by the instruction. +MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, + SmallSet<unsigned,4> &PartDefRegs) { + unsigned LastDefReg = 0; + unsigned LastDefDist = 0; + MachineInstr *LastDef = nullptr; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + MachineInstr *Def = PhysRegDef[SubReg]; + if (!Def) + continue; + unsigned Dist = DistanceMap[Def]; + if (Dist > LastDefDist) { + LastDefReg = SubReg; + LastDef = Def; + LastDefDist = Dist; + } + } + + if (!LastDef) + return nullptr; + + PartDefRegs.insert(LastDefReg); + for (unsigned i = 0, e = LastDef->getNumOperands(); i != e; ++i) { + MachineOperand &MO = LastDef->getOperand(i); + if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) + continue; + unsigned DefReg = MO.getReg(); + if (TRI->isSubRegister(Reg, DefReg)) { + for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + PartDefRegs.insert(*SubRegs); + } + } + return LastDef; +} + +/// HandlePhysRegUse - Turn previous partial def's into read/mod/writes. Add +/// implicit defs to a machine instruction if there was an earlier def of its +/// super-register. +void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; + // If there was a previous use or a "full" def all is well. + if (!LastDef && !PhysRegUse[Reg]) { + // Otherwise, the last sub-register def implicitly defines this register. + // e.g. + // AH = + // AL = ... <imp-def EAX>, <imp-kill AH> + // = AH + // ... + // = EAX + // All of the sub-registers must have been defined before the use of Reg! + SmallSet<unsigned, 4> PartDefRegs; + MachineInstr *LastPartialDef = FindLastPartialDef(Reg, PartDefRegs); + // If LastPartialDef is NULL, it must be using a livein register. + if (LastPartialDef) { + LastPartialDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[Reg] = LastPartialDef; + SmallSet<unsigned, 8> Processed; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (Processed.count(SubReg)) + continue; + if (PartDefRegs.count(SubReg)) + continue; + // This part of Reg was defined before the last partial def. It's killed + // here. + LastPartialDef->addOperand(MachineOperand::CreateReg(SubReg, + false/*IsDef*/, + true/*IsImp*/)); + PhysRegDef[SubReg] = LastPartialDef; + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + Processed.insert(*SS); + } + } + } else if (LastDef && !PhysRegUse[Reg] && + !LastDef->findRegisterDefOperand(Reg)) + // Last def defines the super register, add an implicit def of reg. + LastDef->addOperand(MachineOperand::CreateReg(Reg, true/*IsDef*/, + true/*IsImp*/)); + + // Remember this use. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + PhysRegUse[*SubRegs] = MI; +} + +/// FindLastRefOrPartRef - Return the last reference or partial reference of +/// the specified register. +MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return nullptr; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + unsigned LastPartDefDist = 0; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) + LastPartDefDist = Dist; + } else if (MachineInstr *Use = PhysRegUse[SubReg]) { + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + return LastRefOrPartRef; +} + +bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return false; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + // The whole register is used. + // AL = + // AH = + // + // = AX + // = AL, AX<imp-use, kill> + // AX = + // + // Or whole register is defined, but not used at all. + // AX<dead> = + // ... + // AX = + // + // Or whole register is defined, but only partly used. + // AX<dead> = AL<imp-def> + // = AL<kill> + // AX = + MachineInstr *LastPartDef = nullptr; + unsigned LastPartDefDist = 0; + SmallSet<unsigned, 8> PartUses; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) { + LastPartDefDist = Dist; + LastPartDef = Def; + } + continue; + } + if (MachineInstr *Use = PhysRegUse[SubReg]) { + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid(); + ++SS) + PartUses.insert(*SS); + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + if (!PhysRegUse[Reg]) { + // Partial uses. Mark register def dead and add implicit def of + // sub-registers which are used. + // EAX<dead> = op AL<imp-def> + // That is, EAX def is dead but AL def extends pass it. + PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true); + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (!PartUses.count(SubReg)) + continue; + bool NeedDef = true; + if (PhysRegDef[Reg] == PhysRegDef[SubReg]) { + MachineOperand *MO = PhysRegDef[Reg]->findRegisterDefOperand(SubReg); + if (MO) { + NeedDef = false; + assert(!MO->isDead()); + } + } + if (NeedDef) + PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, + true/*IsDef*/, true/*IsImp*/)); + MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg); + if (LastSubRef) + LastSubRef->addRegisterKilled(SubReg, TRI, true); + else { + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) + PhysRegUse[*SS] = LastRefOrPartRef; + } + for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + PartUses.erase(*SS); + } + } else if (LastRefOrPartRef == PhysRegDef[Reg] && LastRefOrPartRef != MI) { + if (LastPartDef) + // The last partial def kills the register. + LastPartDef->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/, true/*IsKill*/)); + else { + MachineOperand *MO = + LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI); + bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg; + // If the last reference is the last def, then it's not used at all. + // That is, unless we are currently processing the last reference itself. + LastRefOrPartRef->addRegisterDead(Reg, TRI, true); + if (NeedEC) { + // If we are adding a subreg def and the superreg def is marked early + // clobber, add an early clobber marker to the subreg def. + MO = LastRefOrPartRef->findRegisterDefOperand(Reg); + if (MO) + MO->setIsEarlyClobber(); + } + } + } else + LastRefOrPartRef->addRegisterKilled(Reg, TRI, true); + return true; +} + +void LiveVariables::HandleRegMask(const MachineOperand &MO) { + // Call HandlePhysRegKill() for all live registers clobbered by Mask. + // Clobbered registers are always dead, sp there is no need to use + // HandlePhysRegDef(). + for (unsigned Reg = 1, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) { + // Skip dead regs. + if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) + continue; + // Skip mask-preserved regs. + if (!MO.clobbersPhysReg(Reg)) + continue; + // Kill the largest clobbered super-register. + // This avoids needless implicit operands. + unsigned Super = Reg; + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + if ((PhysRegDef[*SR] || PhysRegUse[*SR]) && MO.clobbersPhysReg(*SR)) + Super = *SR; + HandlePhysRegKill(Super, nullptr); + } +} + +void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, + SmallVectorImpl<unsigned> &Defs) { + // What parts of the register are previously defined? + SmallSet<unsigned, 32> Live; + if (PhysRegDef[Reg] || PhysRegUse[Reg]) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + Live.insert(*SubRegs); + } else { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + // If a register isn't itself defined, but all parts that make up of it + // are defined, then consider it also defined. + // e.g. + // AL = + // AH = + // = AX + if (Live.count(SubReg)) + continue; + if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) + Live.insert(*SS); + } + } + } + + // Start from the largest piece, find the last time any part of the register + // is referenced. + HandlePhysRegKill(Reg, MI); + // Only some of the sub-registers are used. + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (!Live.count(SubReg)) + // Skip if this sub-register isn't defined. + continue; + HandlePhysRegKill(SubReg, MI); + } + + if (MI) + Defs.push_back(Reg); // Remember this def. +} + +void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, + SmallVectorImpl<unsigned> &Defs) { + while (!Defs.empty()) { + unsigned Reg = Defs.back(); + Defs.pop_back(); + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + PhysRegDef[SubReg] = MI; + PhysRegUse[SubReg] = nullptr; + } + } +} + +void LiveVariables::runOnInstr(MachineInstr *MI, + SmallVectorImpl<unsigned> &Defs) { + assert(!MI->isDebugValue()); + // Process all of the operands of the instruction... + unsigned NumOperandsToProcess = MI->getNumOperands(); + + // Unless it is a PHI node. In this case, ONLY process the DEF, not any + // of the uses. They will be handled in other basic blocks. + if (MI->isPHI()) + NumOperandsToProcess = 1; + + // Clear kill and dead markers. LV will recompute them. + SmallVector<unsigned, 4> UseRegs; + SmallVector<unsigned, 4> DefRegs; + SmallVector<unsigned, 1> RegMasks; + for (unsigned i = 0; i != NumOperandsToProcess; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + RegMasks.push_back(i); + continue; + } + if (!MO.isReg() || MO.getReg() == 0) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && + MRI->isReserved(MOReg))) + MO.setIsKill(false); + if (MO.readsReg()) + UseRegs.push_back(MOReg); + } else /*MO.isDef()*/ { + if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) && + MRI->isReserved(MOReg))) + MO.setIsDead(false); + DefRegs.push_back(MOReg); + } + } + + MachineBasicBlock *MBB = MI->getParent(); + // Process all uses. + for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) { + unsigned MOReg = UseRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegUse(MOReg, MBB, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegUse(MOReg, MI); + } + + // Process all masked registers. (Call clobbers). + for (unsigned i = 0, e = RegMasks.size(); i != e; ++i) + HandleRegMask(MI->getOperand(RegMasks[i])); + + // Process all defs. + for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) { + unsigned MOReg = DefRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + HandleVirtRegDef(MOReg, MI); + else if (!MRI->isReserved(MOReg)) + HandlePhysRegDef(MOReg, MI, Defs); + } + UpdatePhysRegDefs(MI, Defs); +} + +void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) { + // Mark live-in registers as live-in. + SmallVector<unsigned, 4> Defs; + for (const auto &LI : MBB->liveins()) { + assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) && + "Cannot have a live-in virtual register!"); + HandlePhysRegDef(LI.PhysReg, nullptr, Defs); + } + + // Loop over all of the instructions, processing them. + DistanceMap.clear(); + unsigned Dist = 0; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + DistanceMap.insert(std::make_pair(MI, Dist++)); + + runOnInstr(MI, Defs); + } + + // Handle any virtual assignments from PHI nodes which might be at the + // bottom of this basic block. We check all of our successor blocks to see + // if they have PHI nodes, and if so, we simulate an assignment at the end + // of the current block. + if (!PHIVarInfo[MBB->getNumber()].empty()) { + SmallVectorImpl<unsigned> &VarInfoVec = PHIVarInfo[MBB->getNumber()]; + + for (SmallVectorImpl<unsigned>::iterator I = VarInfoVec.begin(), + E = VarInfoVec.end(); I != E; ++I) + // Mark it alive only in the block we are representing. + MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), + MBB); + } + + // MachineCSE may CSE instructions which write to non-allocatable physical + // registers across MBBs. Remember if any reserved register is liveout. + SmallSet<unsigned, 4> LiveOuts; + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->isEHPad()) + continue; + for (const auto &LI : SuccMBB->liveins()) { + if (!TRI->isInAllocatableClass(LI.PhysReg)) + // Ignore other live-ins, e.g. those that are live into landing pads. + LiveOuts.insert(LI.PhysReg); + } + } + + // Loop over PhysRegDef / PhysRegUse, killing any registers that are + // available at the end of the basic block. + for (unsigned i = 0; i != NumRegs; ++i) + if ((PhysRegDef[i] || PhysRegUse[i]) && !LiveOuts.count(i)) + HandlePhysRegDef(i, nullptr, Defs); +} + +bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + MRI = &mf.getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + + const unsigned NumRegs = TRI->getNumRegs(); + PhysRegDef.assign(NumRegs, nullptr); + PhysRegUse.assign(NumRegs, nullptr); + PHIVarInfo.resize(MF->getNumBlockIDs()); + PHIJoins.clear(); + + // FIXME: LiveIntervals will be updated to remove its dependence on + // LiveVariables to improve compilation time and eliminate bizarre pass + // dependencies. Until then, we can't change much in -O0. + if (!MRI->isSSA()) + report_fatal_error("regalloc=... not currently supported with -O0"); + + analyzePHINodes(mf); + + // Calculate live variable information in depth first order on the CFG of the + // function. This guarantees that we will see the definition of a virtual + // register before its uses due to dominance properties of SSA (except for PHI + // nodes, which are treated as a special case). + MachineBasicBlock *Entry = &MF->front(); + SmallPtrSet<MachineBasicBlock*,16> Visited; + + for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) { + runOnBlock(MBB, NumRegs); + + PhysRegDef.assign(NumRegs, nullptr); + PhysRegUse.assign(NumRegs, nullptr); + } + + // Convert and transfer the dead / killed information we have gathered into + // VirtRegInfo onto MI's. + for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) { + const unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j) + if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg)) + VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI); + else + VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI); + } + + // Check to make sure there are no unreachable blocks in the MC CFG for the + // function. If so, it is due to a bug in the instruction selector or some + // other part of the code generator if this happens. +#ifndef NDEBUG + for(MachineFunction::iterator i = MF->begin(), e = MF->end(); i != e; ++i) + assert(Visited.count(&*i) != 0 && "unreachable basic block found"); +#endif + + PhysRegDef.clear(); + PhysRegUse.clear(); + PHIVarInfo.clear(); + + return false; +} + +/// replaceKillInstruction - Update register kill info by replacing a kill +/// instruction with a new one. +void LiveVariables::replaceKillInstruction(unsigned Reg, MachineInstr *OldMI, + MachineInstr *NewMI) { + VarInfo &VI = getVarInfo(Reg); + std::replace(VI.Kills.begin(), VI.Kills.end(), OldMI, NewMI); +} + +/// removeVirtualRegistersKilled - Remove all killed info for the specified +/// instruction. +void LiveVariables::removeVirtualRegistersKilled(MachineInstr *MI) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isKill()) { + MO.setIsKill(false); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + bool removed = getVarInfo(Reg).removeKill(MI); + assert(removed && "kill not in register's VarInfo?"); + (void)removed; + } + } + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the variable information of a virtual register +/// which is used in a PHI node. We map that to the BB the vreg is coming from. +/// +void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { + for (const auto &MBB : Fn) + for (const auto &BBI : MBB) { + if (!BBI.isPHI()) + break; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) + if (BBI.getOperand(i).readsReg()) + PHIVarInfo[BBI.getOperand(i + 1).getMBB()->getNumber()] + .push_back(BBI.getOperand(i).getReg()); + } +} + +bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, + unsigned Reg, + MachineRegisterInfo &MRI) { + unsigned Num = MBB.getNumber(); + + // Reg is live-through. + if (AliveBlocks.test(Num)) + return true; + + // Registers defined in MBB cannot be live in. + const MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def && Def->getParent() == &MBB) + return false; + + // Reg was not defined in MBB, was it killed here? + return findKill(&MBB); +} + +bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { + LiveVariables::VarInfo &VI = getVarInfo(Reg); + + SmallPtrSet<const MachineBasicBlock *, 8> Kills; + for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i) + Kills.insert(VI.Kills[i]->getParent()); + + // Loop over all of the successors of the basic block, checking to see if + // the value is either live in the block, or if it is killed in the block. + for (const MachineBasicBlock *SuccMBB : MBB.successors()) { + // Is it alive in this successor? + unsigned SuccIdx = SuccMBB->getNumber(); + if (VI.AliveBlocks.test(SuccIdx)) + return true; + // Or is it live because there is a use in a successor that kills it? + if (Kills.count(SuccMBB)) + return true; + } + + return false; +} + +/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All +/// variables that are live out of DomBB will be marked as passing live through +/// BB. +void LiveVariables::addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB, + MachineBasicBlock *SuccBB) { + const unsigned NumNew = BB->getNumber(); + + SmallSet<unsigned, 16> Defs, Kills; + + MachineBasicBlock::iterator BBI = SuccBB->begin(), BBE = SuccBB->end(); + for (; BBI != BBE && BBI->isPHI(); ++BBI) { + // Record the def of the PHI node. + Defs.insert(BBI->getOperand(0).getReg()); + + // All registers used by PHI nodes in SuccBB must be live through BB. + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + if (BBI->getOperand(i+1).getMBB() == BB) + getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); + } + + // Record all vreg defs and kills of all instructions in SuccBB. + for (; BBI != BBE; ++BBI) { + for (MachineInstr::mop_iterator I = BBI->operands_begin(), + E = BBI->operands_end(); I != E; ++I) { + if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) { + if (I->isDef()) + Defs.insert(I->getReg()); + else if (I->isKill()) + Kills.insert(I->getReg()); + } + } + } + + // Update info for all live variables + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // If the Defs is defined in the successor it can't be live in BB. + if (Defs.count(Reg)) + continue; + + // If the register is either killed in or live through SuccBB it's also live + // through BB. + VarInfo &VI = getVarInfo(Reg); + if (Kills.count(Reg) || VI.AliveBlocks.test(SuccBB->getNumber())) + VI.AliveBlocks.set(NumNew); + } +} diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp new file mode 100644 index 0000000..eb60005 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -0,0 +1,426 @@ +//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass assigns local frame indices to stack slots relative to one another +// and allocates additional base registers to access them when the target +// estimates they are likely to be out of range of stack pointer and frame +// pointer relative addressing. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "localstackalloc" + +STATISTIC(NumAllocations, "Number of frame indices allocated into local block"); +STATISTIC(NumBaseRegisters, "Number of virtual frame base registers allocated"); +STATISTIC(NumReplacements, "Number of frame indices references replaced"); + +namespace { + class FrameRef { + MachineBasicBlock::iterator MI; // Instr referencing the frame + int64_t LocalOffset; // Local offset of the frame idx referenced + int FrameIdx; // The frame index + public: + FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) : + MI(I), LocalOffset(Offset), FrameIdx(Idx) {} + bool operator<(const FrameRef &RHS) const { + return LocalOffset < RHS.LocalOffset; + } + MachineBasicBlock::iterator getMachineInstr() const { return MI; } + int64_t getLocalOffset() const { return LocalOffset; } + int getFrameIndex() const { return FrameIdx; } + }; + + class LocalStackSlotPass: public MachineFunctionPass { + SmallVector<int64_t,16> LocalOffsets; + /// StackObjSet - A set of stack object indexes + typedef SmallSetVector<int, 8> StackObjSet; + + void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset, + bool StackGrowsDown, unsigned &MaxAlign); + void AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign); + void calculateFrameObjectOffsets(MachineFunction &Fn); + bool insertFrameReferenceRegisters(MachineFunction &Fn); + public: + static char ID; // Pass identification, replacement for typeid + explicit LocalStackSlotPass() : MachineFunctionPass(ID) { + initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry()); + } + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<StackProtector>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + }; +} // end anonymous namespace + +char LocalStackSlotPass::ID = 0; +char &llvm::LocalStackSlotAllocationID = LocalStackSlotPass::ID; +INITIALIZE_PASS_BEGIN(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc", + "Local Stack Slot Allocation", false, false) + + +bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned LocalObjectCount = MFI->getObjectIndexEnd(); + + // If the target doesn't want/need this pass, or if there are no locals + // to consider, early exit. + if (!TRI->requiresVirtualBaseRegisters(MF) || LocalObjectCount == 0) + return true; + + // Make sure we have enough space to store the local offsets. + LocalOffsets.resize(MFI->getObjectIndexEnd()); + + // Lay out the local blob. + calculateFrameObjectOffsets(MF); + + // Insert virtual base registers to resolve frame index references. + bool UsedBaseRegs = insertFrameReferenceRegisters(MF); + + // Tell MFI whether any base registers were allocated. PEI will only + // want to use the local block allocations from this pass if there were any. + // Otherwise, PEI can do a bit better job of getting the alignment right + // without a hole at the start since it knows the alignment of the stack + // at the start of local allocation, and this pass doesn't. + MFI->setUseLocalStackAllocationBlock(UsedBaseRegs); + + return true; +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI, + int FrameIdx, int64_t &Offset, + bool StackGrowsDown, + unsigned &MaxAlign) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = (Offset + Align - 1) / Align * Align; + + int64_t LocalOffset = StackGrowsDown ? -Offset : Offset; + DEBUG(dbgs() << "Allocate FI(" << FrameIdx << ") to local offset " + << LocalOffset << "\n"); + // Keep the offset available for base register allocation + LocalOffsets[FrameIdx] = LocalOffset; + // And tell MFI about it for PEI to use later + MFI->mapLocalFrameObject(FrameIdx, LocalOffset); + + if (!StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + ++NumAllocations; +} + +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + ProtectedObjs.insert(i); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) { + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + int64_t Offset = 0; + unsigned MaxAlign = 0; + StackProtector *SP = &getAnalysis<StackProtector>(); + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> ProtectedObjs; + if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset, + StackGrowsDown, MaxAlign); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); + } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign); + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (ProtectedObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, Offset, StackGrowsDown, MaxAlign); + } + + // Remember how big this blob of stack space is + MFI->setLocalFrameSize(Offset); + MFI->setLocalFrameMaxAlign(MaxAlign); +} + +static inline bool +lookupCandidateBaseReg(unsigned BaseReg, + int64_t BaseOffset, + int64_t FrameSizeAdjust, + int64_t LocalFrameOffset, + const MachineInstr *MI, + const TargetRegisterInfo *TRI) { + // Check if the relative offset from the where the base register references + // to the target address is in range for the instruction. + int64_t Offset = FrameSizeAdjust + LocalFrameOffset - BaseOffset; + return TRI->isFrameOffsetLegal(MI, BaseReg, Offset); +} + +bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) { + // Scan the function's instructions looking for frame index references. + // For each, ask the target if it wants a virtual base register for it + // based on what we can tell it about where the local will end up in the + // stack frame. If it wants one, re-use a suitable one we've previously + // allocated, or if there isn't one that fits the bill, allocate a new one + // and ask the target to create a defining instruction for it. + bool UsedBaseReg = false; + + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + // Collect all of the instructions in the block that reference + // a frame index. Also store the frame index referenced to ease later + // lookup. (For any insn that has more than one FI reference, we arbitrarily + // choose the first one). + SmallVector<FrameRef, 64> FrameReferenceInsns; + + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { + MachineInstr *MI = I; + + // Debug value, stackmap and patchpoint instructions can't be out of + // range, so they don't need any updates. + if (MI->isDebugValue() || + MI->getOpcode() == TargetOpcode::STATEPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) + continue; + + // For now, allocate the base register(s) within the basic block + // where they're used, and don't try to keep them around outside + // of that. It may be beneficial to try sharing them more broadly + // than that, but the increased register pressure makes that a + // tricky thing to balance. Investigate if re-materializing these + // becomes an issue. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + // Consider replacing all frame index operands that reference + // an object allocated in the local block. + if (MI->getOperand(i).isFI()) { + // Don't try this with values not in the local block. + if (!MFI->isObjectPreAllocated(MI->getOperand(i).getIndex())) + break; + int Idx = MI->getOperand(i).getIndex(); + int64_t LocalOffset = LocalOffsets[Idx]; + if (!TRI->needsFrameBaseReg(MI, LocalOffset)) + break; + FrameReferenceInsns. + push_back(FrameRef(MI, LocalOffset, Idx)); + break; + } + } + } + } + + // Sort the frame references by local offset + array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end()); + + MachineBasicBlock *Entry = &Fn.front(); + + unsigned BaseReg = 0; + int64_t BaseOffset = 0; + + // Loop through the frame references and allocate for them as necessary. + for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) { + FrameRef &FR = FrameReferenceInsns[ref]; + MachineBasicBlock::iterator I = FR.getMachineInstr(); + MachineInstr *MI = I; + int64_t LocalOffset = FR.getLocalOffset(); + int FrameIdx = FR.getFrameIndex(); + assert(MFI->isObjectPreAllocated(FrameIdx) && + "Only pre-allocated locals expected!"); + + DEBUG(dbgs() << "Considering: " << *MI); + + unsigned idx = 0; + for (unsigned f = MI->getNumOperands(); idx != f; ++idx) { + if (!MI->getOperand(idx).isFI()) + continue; + + if (FrameIdx == I->getOperand(idx).getIndex()) + break; + } + + assert(idx < MI->getNumOperands() && "Cannot find FI operand"); + + int64_t Offset = 0; + int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0; + + DEBUG(dbgs() << " Replacing FI in: " << *MI); + + // If we have a suitable base register available, use it; otherwise + // create a new one. Note that any offset encoded in the + // instruction itself will be taken into account by the target, + // so we don't have to adjust for it here when reusing a base + // register. + if (UsedBaseReg && lookupCandidateBaseReg(BaseReg, BaseOffset, + FrameSizeAdjust, LocalOffset, MI, + TRI)) { + DEBUG(dbgs() << " Reusing base register " << BaseReg << "\n"); + // We found a register to reuse. + Offset = FrameSizeAdjust + LocalOffset - BaseOffset; + } else { + // No previously defined register was in range, so create a // new one. + + int64_t InstrOffset = TRI->getFrameIndexInstrOffset(MI, idx); + + int64_t PrevBaseOffset = BaseOffset; + BaseOffset = FrameSizeAdjust + LocalOffset + InstrOffset; + + // We'd like to avoid creating single-use virtual base registers. + // Because the FrameRefs are in sorted order, and we've already + // processed all FrameRefs before this one, just check whether or not + // the next FrameRef will be able to reuse this new register. If not, + // then don't bother creating it. + if (ref + 1 >= e || + !lookupCandidateBaseReg( + BaseReg, BaseOffset, FrameSizeAdjust, + FrameReferenceInsns[ref + 1].getLocalOffset(), + FrameReferenceInsns[ref + 1].getMachineInstr(), TRI)) { + BaseOffset = PrevBaseOffset; + continue; + } + + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetRegisterClass *RC = TRI->getPointerRegClass(*MF); + BaseReg = Fn.getRegInfo().createVirtualRegister(RC); + + DEBUG(dbgs() << " Materializing base register " << BaseReg << + " at frame local offset " << LocalOffset + InstrOffset << "\n"); + + // Tell the target to insert the instruction to initialize + // the base register. + // MachineBasicBlock::iterator InsertionPt = Entry->begin(); + TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx, + InstrOffset); + + // The base register already includes any offset specified + // by the instruction, so account for that so it doesn't get + // applied twice. + Offset = -InstrOffset; + + ++NumBaseRegisters; + UsedBaseReg = true; + } + assert(BaseReg != 0 && "Unable to allocate virtual base register!"); + + // Modify the instruction to use the new base register rather + // than the frame index operand. + TRI->resolveFrameIndex(*I, BaseReg, Offset); + DEBUG(dbgs() << "Resolved: " << *MI); + + ++NumReplacements; + } + + return UsedBaseReg; +} diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp new file mode 100644 index 0000000..28f9d4e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -0,0 +1,604 @@ +//===- MILexer.cpp - Machine instructions lexer implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the lexing of machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "MILexer.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include <cctype> + +using namespace llvm; + +namespace { + +/// This class provides a way to iterate and get characters from the source +/// string. +class Cursor { + const char *Ptr; + const char *End; + +public: + Cursor(NoneType) : Ptr(nullptr), End(nullptr) {} + + explicit Cursor(StringRef Str) { + Ptr = Str.data(); + End = Ptr + Str.size(); + } + + bool isEOF() const { return Ptr == End; } + + char peek(int I = 0) const { return End - Ptr <= I ? 0 : Ptr[I]; } + + void advance(unsigned I = 1) { Ptr += I; } + + StringRef remaining() const { return StringRef(Ptr, End - Ptr); } + + StringRef upto(Cursor C) const { + assert(C.Ptr >= Ptr && C.Ptr <= End); + return StringRef(Ptr, C.Ptr - Ptr); + } + + StringRef::iterator location() const { return Ptr; } + + operator bool() const { return Ptr != nullptr; } +}; + +} // end anonymous namespace + +MIToken &MIToken::reset(TokenKind Kind, StringRef Range) { + this->Kind = Kind; + this->Range = Range; + return *this; +} + +MIToken &MIToken::setStringValue(StringRef StrVal) { + StringValue = StrVal; + return *this; +} + +MIToken &MIToken::setOwnedStringValue(std::string StrVal) { + StringValueStorage = std::move(StrVal); + StringValue = StringValueStorage; + return *this; +} + +MIToken &MIToken::setIntegerValue(APSInt IntVal) { + this->IntVal = std::move(IntVal); + return *this; +} + +/// Skip the leading whitespace characters and return the updated cursor. +static Cursor skipWhitespace(Cursor C) { + while (isblank(C.peek())) + C.advance(); + return C; +} + +static bool isNewlineChar(char C) { return C == '\n' || C == '\r'; } + +/// Skip a line comment and return the updated cursor. +static Cursor skipComment(Cursor C) { + if (C.peek() != ';') + return C; + while (!isNewlineChar(C.peek()) && !C.isEOF()) + C.advance(); + return C; +} + +/// Return true if the given character satisfies the following regular +/// expression: [-a-zA-Z$._0-9] +static bool isIdentifierChar(char C) { + return isalpha(C) || isdigit(C) || C == '_' || C == '-' || C == '.' || + C == '$'; +} + +/// Unescapes the given string value. +/// +/// Expects the string value to be quoted. +static std::string unescapeQuotedString(StringRef Value) { + assert(Value.front() == '"' && Value.back() == '"'); + Cursor C = Cursor(Value.substr(1, Value.size() - 2)); + + std::string Str; + Str.reserve(C.remaining().size()); + while (!C.isEOF()) { + char Char = C.peek(); + if (Char == '\\') { + if (C.peek(1) == '\\') { + // Two '\' become one + Str += '\\'; + C.advance(2); + continue; + } + if (isxdigit(C.peek(1)) && isxdigit(C.peek(2))) { + Str += hexDigitValue(C.peek(1)) * 16 + hexDigitValue(C.peek(2)); + C.advance(3); + continue; + } + } + Str += Char; + C.advance(); + } + return Str; +} + +/// Lex a string constant using the following regular expression: \"[^\"]*\" +static Cursor lexStringConstant( + Cursor C, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + assert(C.peek() == '"'); + for (C.advance(); C.peek() != '"'; C.advance()) { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '\"'"); + return None; + } + } + C.advance(); + return C; +} + +static Cursor lexName( + Cursor C, MIToken &Token, MIToken::TokenKind Type, unsigned PrefixLength, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + auto Range = C; + C.advance(PrefixLength); + if (C.peek() == '"') { + if (Cursor R = lexStringConstant(C, ErrorCallback)) { + StringRef String = Range.upto(R); + Token.reset(Type, String) + .setOwnedStringValue( + unescapeQuotedString(String.drop_front(PrefixLength))); + return R; + } + Token.reset(MIToken::Error, Range.remaining()); + return Range; + } + while (isIdentifierChar(C.peek())) + C.advance(); + Token.reset(Type, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(PrefixLength)); + return C; +} + +static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) { + if (C.peek() != 'i' || !isdigit(C.peek(1))) + return None; + auto Range = C; + C.advance(); // Skip 'i' + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::IntegerType, Range.upto(C)); + return C; +} + +static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { + return StringSwitch<MIToken::TokenKind>(Identifier) + .Case("_", MIToken::underscore) + .Case("implicit", MIToken::kw_implicit) + .Case("implicit-def", MIToken::kw_implicit_define) + .Case("def", MIToken::kw_def) + .Case("dead", MIToken::kw_dead) + .Case("killed", MIToken::kw_killed) + .Case("undef", MIToken::kw_undef) + .Case("internal", MIToken::kw_internal) + .Case("early-clobber", MIToken::kw_early_clobber) + .Case("debug-use", MIToken::kw_debug_use) + .Case("tied-def", MIToken::kw_tied_def) + .Case("frame-setup", MIToken::kw_frame_setup) + .Case("debug-location", MIToken::kw_debug_location) + .Case(".cfi_same_value", MIToken::kw_cfi_same_value) + .Case(".cfi_offset", MIToken::kw_cfi_offset) + .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register) + .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset) + .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa) + .Case("blockaddress", MIToken::kw_blockaddress) + .Case("target-index", MIToken::kw_target_index) + .Case("half", MIToken::kw_half) + .Case("float", MIToken::kw_float) + .Case("double", MIToken::kw_double) + .Case("x86_fp80", MIToken::kw_x86_fp80) + .Case("fp128", MIToken::kw_fp128) + .Case("ppc_fp128", MIToken::kw_ppc_fp128) + .Case("target-flags", MIToken::kw_target_flags) + .Case("volatile", MIToken::kw_volatile) + .Case("non-temporal", MIToken::kw_non_temporal) + .Case("invariant", MIToken::kw_invariant) + .Case("align", MIToken::kw_align) + .Case("stack", MIToken::kw_stack) + .Case("got", MIToken::kw_got) + .Case("jump-table", MIToken::kw_jump_table) + .Case("constant-pool", MIToken::kw_constant_pool) + .Case("call-entry", MIToken::kw_call_entry) + .Case("liveout", MIToken::kw_liveout) + .Case("address-taken", MIToken::kw_address_taken) + .Case("landing-pad", MIToken::kw_landing_pad) + .Case("liveins", MIToken::kw_liveins) + .Case("successors", MIToken::kw_successors) + .Default(MIToken::Identifier); +} + +static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) { + if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.') + return None; + auto Range = C; + while (isIdentifierChar(C.peek())) + C.advance(); + auto Identifier = Range.upto(C); + Token.reset(getIdentifierKind(Identifier), Identifier) + .setStringValue(Identifier); + return C; +} + +static Cursor maybeLexMachineBasicBlock( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + bool IsReference = C.remaining().startswith("%bb."); + if (!IsReference && !C.remaining().startswith("bb.")) + return None; + auto Range = C; + unsigned PrefixLength = IsReference ? 4 : 3; + C.advance(PrefixLength); // Skip '%bb.' or 'bb.' + if (!isdigit(C.peek())) { + Token.reset(MIToken::Error, C.remaining()); + ErrorCallback(C.location(), "expected a number after '%bb.'"); + return C; + } + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + StringRef Number = NumberRange.upto(C); + unsigned StringOffset = PrefixLength + Number.size(); // Drop '%bb.<id>' + if (C.peek() == '.') { + C.advance(); // Skip '.' + ++StringOffset; + while (isIdentifierChar(C.peek())) + C.advance(); + } + Token.reset(IsReference ? MIToken::MachineBasicBlock + : MIToken::MachineBasicBlockLabel, + Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); + return C; +} + +static Cursor maybeLexIndex(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(Kind, Range.upto(C)).setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexIndexAndName(Cursor C, MIToken &Token, StringRef Rule, + MIToken::TokenKind Kind) { + if (!C.remaining().startswith(Rule) || !isdigit(C.peek(Rule.size()))) + return None; + auto Range = C; + C.advance(Rule.size()); + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + StringRef Number = NumberRange.upto(C); + unsigned StringOffset = Rule.size() + Number.size(); + if (C.peek() == '.') { + C.advance(); + ++StringOffset; + while (isIdentifierChar(C.peek())) + C.advance(); + } + Token.reset(Kind, Range.upto(C)) + .setIntegerValue(APSInt(Number)) + .setStringValue(Range.upto(C).drop_front(StringOffset)); + return C; +} + +static Cursor maybeLexJumpTableIndex(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%jump-table.", MIToken::JumpTableIndex); +} + +static Cursor maybeLexStackObject(Cursor C, MIToken &Token) { + return maybeLexIndexAndName(C, Token, "%stack.", MIToken::StackObject); +} + +static Cursor maybeLexFixedStackObject(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%fixed-stack.", MIToken::FixedStackObject); +} + +static Cursor maybeLexConstantPoolItem(Cursor C, MIToken &Token) { + return maybeLexIndex(C, Token, "%const.", MIToken::ConstantPoolItem); +} + +static Cursor maybeLexIRBlock( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + const StringRef Rule = "%ir-block."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRBlock); + return lexName(C, Token, MIToken::NamedIRBlock, Rule.size(), ErrorCallback); +} + +static Cursor maybeLexIRValue( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + const StringRef Rule = "%ir."; + if (!C.remaining().startswith(Rule)) + return None; + if (isdigit(C.peek(Rule.size()))) + return maybeLexIndex(C, Token, Rule, MIToken::IRValue); + return lexName(C, Token, MIToken::NamedIRValue, Rule.size(), ErrorCallback); +} + +static Cursor lexVirtualRegister(Cursor C, MIToken &Token) { + auto Range = C; + C.advance(); // Skip '%' + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::VirtualRegister, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexRegister(Cursor C, MIToken &Token) { + if (C.peek() != '%') + return None; + if (isdigit(C.peek(1))) + return lexVirtualRegister(C, Token); + auto Range = C; + C.advance(); // Skip '%' + while (isIdentifierChar(C.peek())) + C.advance(); + Token.reset(MIToken::NamedRegister, Range.upto(C)) + .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%' + return C; +} + +static Cursor maybeLexGlobalValue( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '@') + return None; + if (!isdigit(C.peek(1))) + return lexName(C, Token, MIToken::NamedGlobalValue, /*PrefixLength=*/1, + ErrorCallback); + auto Range = C; + C.advance(1); // Skip the '@' + auto NumberRange = C; + while (isdigit(C.peek())) + C.advance(); + Token.reset(MIToken::GlobalValue, Range.upto(C)) + .setIntegerValue(APSInt(NumberRange.upto(C))); + return C; +} + +static Cursor maybeLexExternalSymbol( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '$') + return None; + return lexName(C, Token, MIToken::ExternalSymbol, /*PrefixLength=*/1, + ErrorCallback); +} + +static bool isValidHexFloatingPointPrefix(char C) { + return C == 'H' || C == 'K' || C == 'L' || C == 'M'; +} + +static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) { + if (C.peek() != '0' || C.peek(1) != 'x') + return None; + Cursor Range = C; + C.advance(2); // Skip '0x' + if (isValidHexFloatingPointPrefix(C.peek())) + C.advance(); + while (isxdigit(C.peek())) + C.advance(); + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) { + C.advance(); + // Skip over [0-9]*([eE][-+]?[0-9]+)? + while (isdigit(C.peek())) + C.advance(); + if ((C.peek() == 'e' || C.peek() == 'E') && + (isdigit(C.peek(1)) || + ((C.peek(1) == '-' || C.peek(1) == '+') && isdigit(C.peek(2))))) { + C.advance(2); + while (isdigit(C.peek())) + C.advance(); + } + Token.reset(MIToken::FloatingPointLiteral, Range.upto(C)); + return C; +} + +static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) { + if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1)))) + return None; + auto Range = C; + C.advance(); + while (isdigit(C.peek())) + C.advance(); + if (C.peek() == '.') + return lexFloatingPointLiteral(Range, C, Token); + StringRef StrVal = Range.upto(C); + Token.reset(MIToken::IntegerLiteral, StrVal).setIntegerValue(APSInt(StrVal)); + return C; +} + +static MIToken::TokenKind getMetadataKeywordKind(StringRef Identifier) { + return StringSwitch<MIToken::TokenKind>(Identifier) + .Case("!tbaa", MIToken::md_tbaa) + .Case("!alias.scope", MIToken::md_alias_scope) + .Case("!noalias", MIToken::md_noalias) + .Case("!range", MIToken::md_range) + .Default(MIToken::Error); +} + +static Cursor maybeLexExlaim( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '!') + return None; + auto Range = C; + C.advance(1); + if (isdigit(C.peek()) || !isIdentifierChar(C.peek())) { + Token.reset(MIToken::exclaim, Range.upto(C)); + return C; + } + while (isIdentifierChar(C.peek())) + C.advance(); + StringRef StrVal = Range.upto(C); + Token.reset(getMetadataKeywordKind(StrVal), StrVal); + if (Token.isError()) + ErrorCallback(Token.location(), + "use of unknown metadata keyword '" + StrVal + "'"); + return C; +} + +static MIToken::TokenKind symbolToken(char C) { + switch (C) { + case ',': + return MIToken::comma; + case '=': + return MIToken::equal; + case ':': + return MIToken::colon; + case '(': + return MIToken::lparen; + case ')': + return MIToken::rparen; + case '{': + return MIToken::lbrace; + case '}': + return MIToken::rbrace; + case '+': + return MIToken::plus; + case '-': + return MIToken::minus; + default: + return MIToken::Error; + } +} + +static Cursor maybeLexSymbol(Cursor C, MIToken &Token) { + MIToken::TokenKind Kind; + unsigned Length = 1; + if (C.peek() == ':' && C.peek(1) == ':') { + Kind = MIToken::coloncolon; + Length = 2; + } else + Kind = symbolToken(C.peek()); + if (Kind == MIToken::Error) + return None; + auto Range = C; + C.advance(Length); + Token.reset(Kind, Range.upto(C)); + return C; +} + +static Cursor maybeLexNewline(Cursor C, MIToken &Token) { + if (!isNewlineChar(C.peek())) + return None; + auto Range = C; + C.advance(); + Token.reset(MIToken::Newline, Range.upto(C)); + return C; +} + +static Cursor maybeLexEscapedIRValue( + Cursor C, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + if (C.peek() != '`') + return None; + auto Range = C; + C.advance(); + auto StrRange = C; + while (C.peek() != '`') { + if (C.isEOF() || isNewlineChar(C.peek())) { + ErrorCallback( + C.location(), + "end of machine instruction reached before the closing '`'"); + Token.reset(MIToken::Error, Range.remaining()); + return C; + } + C.advance(); + } + StringRef Value = StrRange.upto(C); + C.advance(); + Token.reset(MIToken::QuotedIRValue, Range.upto(C)).setStringValue(Value); + return C; +} + +StringRef llvm::lexMIToken( + StringRef Source, MIToken &Token, + function_ref<void(StringRef::iterator Loc, const Twine &)> ErrorCallback) { + auto C = skipComment(skipWhitespace(Cursor(Source))); + if (C.isEOF()) { + Token.reset(MIToken::Eof, C.remaining()); + return C.remaining(); + } + + if (Cursor R = maybeLexIntegerType(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIdentifier(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexJumpTableIndex(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexFixedStackObject(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexConstantPoolItem(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexIRBlock(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexIRValue(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexRegister(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexGlobalValue(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexNumericalLiteral(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexExlaim(C, Token, ErrorCallback)) + return R.remaining(); + if (Cursor R = maybeLexSymbol(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexNewline(C, Token)) + return R.remaining(); + if (Cursor R = maybeLexEscapedIRValue(C, Token, ErrorCallback)) + return R.remaining(); + + Token.reset(MIToken::Error, C.remaining()); + ErrorCallback(C.location(), + Twine("unexpected character '") + Twine(C.peek()) + "'"); + return C.remaining(); +} diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h new file mode 100644 index 0000000..ff54aa3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -0,0 +1,193 @@ +//===- MILexer.h - Lexer for machine instructions -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the function that lexes the machine instruction source +// string. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H +#define LLVM_LIB_CODEGEN_MIRPARSER_MILEXER_H + +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include <functional> + +namespace llvm { + +class Twine; + +/// A token produced by the machine instruction lexer. +struct MIToken { + enum TokenKind { + // Markers + Eof, + Error, + Newline, + + // Tokens with no info. + comma, + equal, + underscore, + colon, + coloncolon, + exclaim, + lparen, + rparen, + lbrace, + rbrace, + plus, + minus, + + // Keywords + kw_implicit, + kw_implicit_define, + kw_def, + kw_dead, + kw_killed, + kw_undef, + kw_internal, + kw_early_clobber, + kw_debug_use, + kw_tied_def, + kw_frame_setup, + kw_debug_location, + kw_cfi_same_value, + kw_cfi_offset, + kw_cfi_def_cfa_register, + kw_cfi_def_cfa_offset, + kw_cfi_def_cfa, + kw_blockaddress, + kw_target_index, + kw_half, + kw_float, + kw_double, + kw_x86_fp80, + kw_fp128, + kw_ppc_fp128, + kw_target_flags, + kw_volatile, + kw_non_temporal, + kw_invariant, + kw_align, + kw_stack, + kw_got, + kw_jump_table, + kw_constant_pool, + kw_call_entry, + kw_liveout, + kw_address_taken, + kw_landing_pad, + kw_liveins, + kw_successors, + + // Named metadata keywords + md_tbaa, + md_alias_scope, + md_noalias, + md_range, + + // Identifier tokens + Identifier, + IntegerType, + NamedRegister, + MachineBasicBlockLabel, + MachineBasicBlock, + StackObject, + FixedStackObject, + NamedGlobalValue, + GlobalValue, + ExternalSymbol, + + // Other tokens + IntegerLiteral, + FloatingPointLiteral, + VirtualRegister, + ConstantPoolItem, + JumpTableIndex, + NamedIRBlock, + IRBlock, + NamedIRValue, + IRValue, + QuotedIRValue // `<constant value>` + }; + +private: + TokenKind Kind; + StringRef Range; + StringRef StringValue; + std::string StringValueStorage; + APSInt IntVal; + +public: + MIToken() : Kind(Error) {} + + MIToken &reset(TokenKind Kind, StringRef Range); + + MIToken &setStringValue(StringRef StrVal); + MIToken &setOwnedStringValue(std::string StrVal); + MIToken &setIntegerValue(APSInt IntVal); + + TokenKind kind() const { return Kind; } + + bool isError() const { return Kind == Error; } + + bool isNewlineOrEOF() const { return Kind == Newline || Kind == Eof; } + + bool isErrorOrEOF() const { return Kind == Error || Kind == Eof; } + + bool isRegister() const { + return Kind == NamedRegister || Kind == underscore || + Kind == VirtualRegister; + } + + bool isRegisterFlag() const { + return Kind == kw_implicit || Kind == kw_implicit_define || + Kind == kw_def || Kind == kw_dead || Kind == kw_killed || + Kind == kw_undef || Kind == kw_internal || + Kind == kw_early_clobber || Kind == kw_debug_use; + } + + bool isMemoryOperandFlag() const { + return Kind == kw_volatile || Kind == kw_non_temporal || + Kind == kw_invariant; + } + + bool is(TokenKind K) const { return Kind == K; } + + bool isNot(TokenKind K) const { return Kind != K; } + + StringRef::iterator location() const { return Range.begin(); } + + StringRef range() const { return Range; } + + /// Return the token's string value. + StringRef stringValue() const { return StringValue; } + + const APSInt &integerValue() const { return IntVal; } + + bool hasIntegerValue() const { + return Kind == IntegerLiteral || Kind == MachineBasicBlock || + Kind == MachineBasicBlockLabel || Kind == StackObject || + Kind == FixedStackObject || Kind == GlobalValue || + Kind == VirtualRegister || Kind == ConstantPoolItem || + Kind == JumpTableIndex || Kind == IRBlock || Kind == IRValue; + } +}; + +/// Consume a single machine instruction token in the given source and return +/// the remaining source string. +StringRef lexMIToken( + StringRef Source, MIToken &Token, + function_ref<void(StringRef::iterator, const Twine &)> ErrorCallback); + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp new file mode 100644 index 0000000..f2f6584 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp @@ -0,0 +1,2011 @@ +//===- MIParser.cpp - Machine instructions parser implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the parsing of machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "MIParser.h" +#include "MILexer.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +namespace { + +/// A wrapper struct around the 'MachineOperand' struct that includes a source +/// range and other attributes. +struct ParsedMachineOperand { + MachineOperand Operand; + StringRef::iterator Begin; + StringRef::iterator End; + Optional<unsigned> TiedDefIdx; + + ParsedMachineOperand(const MachineOperand &Operand, StringRef::iterator Begin, + StringRef::iterator End, Optional<unsigned> &TiedDefIdx) + : Operand(Operand), Begin(Begin), End(End), TiedDefIdx(TiedDefIdx) { + if (TiedDefIdx) + assert(Operand.isReg() && Operand.isUse() && + "Only used register operands can be tied"); + } +}; + +class MIParser { + SourceMgr &SM; + MachineFunction &MF; + SMDiagnostic &Error; + StringRef Source, CurrentSource; + MIToken Token; + const PerFunctionMIParsingState &PFS; + /// Maps from indices to unnamed global values and metadata nodes. + const SlotMapping &IRSlots; + /// Maps from instruction names to op codes. + StringMap<unsigned> Names2InstrOpCodes; + /// Maps from register names to registers. + StringMap<unsigned> Names2Regs; + /// Maps from register mask names to register masks. + StringMap<const uint32_t *> Names2RegMasks; + /// Maps from subregister names to subregister indices. + StringMap<unsigned> Names2SubRegIndices; + /// Maps from slot numbers to function's unnamed basic blocks. + DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks; + /// Maps from slot numbers to function's unnamed values. + DenseMap<unsigned, const Value *> Slots2Values; + /// Maps from target index names to target indices. + StringMap<int> Names2TargetIndices; + /// Maps from direct target flag names to the direct target flag values. + StringMap<unsigned> Names2DirectTargetFlags; + /// Maps from direct target flag names to the bitmask target flag values. + StringMap<unsigned> Names2BitmaskTargetFlags; + +public: + MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, + StringRef Source, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots); + + void lex(); + + /// Report an error at the current location with the given message. + /// + /// This function always return true. + bool error(const Twine &Msg); + + /// Report an error at the given location with the given message. + /// + /// This function always return true. + bool error(StringRef::iterator Loc, const Twine &Msg); + + bool + parseBasicBlockDefinitions(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool parseBasicBlocks(); + bool parse(MachineInstr *&MI); + bool parseStandaloneMBB(MachineBasicBlock *&MBB); + bool parseStandaloneNamedRegister(unsigned &Reg); + bool parseStandaloneVirtualRegister(unsigned &Reg); + bool parseStandaloneStackObject(int &FI); + bool parseStandaloneMDNode(MDNode *&Node); + + bool + parseBasicBlockDefinition(DenseMap<unsigned, MachineBasicBlock *> &MBBSlots); + bool parseBasicBlock(MachineBasicBlock &MBB); + bool parseBasicBlockLiveins(MachineBasicBlock &MBB); + bool parseBasicBlockSuccessors(MachineBasicBlock &MBB); + + bool parseRegister(unsigned &Reg); + bool parseRegisterFlag(unsigned &Flags); + bool parseSubRegisterIndex(unsigned &SubReg); + bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx); + bool parseRegisterOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx, bool IsDef = false); + bool parseImmediateOperand(MachineOperand &Dest); + bool parseIRConstant(StringRef::iterator Loc, StringRef Source, + const Constant *&C); + bool parseIRConstant(StringRef::iterator Loc, const Constant *&C); + bool parseTypedImmediateOperand(MachineOperand &Dest); + bool parseFPImmediateOperand(MachineOperand &Dest); + bool parseMBBReference(MachineBasicBlock *&MBB); + bool parseMBBOperand(MachineOperand &Dest); + bool parseStackFrameIndex(int &FI); + bool parseStackObjectOperand(MachineOperand &Dest); + bool parseFixedStackFrameIndex(int &FI); + bool parseFixedStackObjectOperand(MachineOperand &Dest); + bool parseGlobalValue(GlobalValue *&GV); + bool parseGlobalAddressOperand(MachineOperand &Dest); + bool parseConstantPoolIndexOperand(MachineOperand &Dest); + bool parseJumpTableIndexOperand(MachineOperand &Dest); + bool parseExternalSymbolOperand(MachineOperand &Dest); + bool parseMDNode(MDNode *&Node); + bool parseMetadataOperand(MachineOperand &Dest); + bool parseCFIOffset(int &Offset); + bool parseCFIRegister(unsigned &Reg); + bool parseCFIOperand(MachineOperand &Dest); + bool parseIRBlock(BasicBlock *&BB, const Function &F); + bool parseBlockAddressOperand(MachineOperand &Dest); + bool parseTargetIndexOperand(MachineOperand &Dest); + bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest); + bool parseMachineOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx); + bool parseMachineOperandAndTargetFlags(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx); + bool parseOffset(int64_t &Offset); + bool parseAlignment(unsigned &Alignment); + bool parseOperandsOffset(MachineOperand &Op); + bool parseIRValue(const Value *&V); + bool parseMemoryOperandFlag(unsigned &Flags); + bool parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV); + bool parseMachinePointerInfo(MachinePointerInfo &Dest); + bool parseMachineMemoryOperand(MachineMemOperand *&Dest); + +private: + /// Convert the integer literal in the current token into an unsigned integer. + /// + /// Return true if an error occurred. + bool getUnsigned(unsigned &Result); + + /// Convert the integer literal in the current token into an uint64. + /// + /// Return true if an error occurred. + bool getUint64(uint64_t &Result); + + /// If the current token is of the given kind, consume it and return false. + /// Otherwise report an error and return true. + bool expectAndConsume(MIToken::TokenKind TokenKind); + + /// If the current token is of the given kind, consume it and return true. + /// Otherwise return false. + bool consumeIfPresent(MIToken::TokenKind TokenKind); + + void initNames2InstrOpCodes(); + + /// Try to convert an instruction name to an opcode. Return true if the + /// instruction name is invalid. + bool parseInstrName(StringRef InstrName, unsigned &OpCode); + + bool parseInstruction(unsigned &OpCode, unsigned &Flags); + + bool assignRegisterTies(MachineInstr &MI, + ArrayRef<ParsedMachineOperand> Operands); + + bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, + const MCInstrDesc &MCID); + + void initNames2Regs(); + + /// Try to convert a register name to a register number. Return true if the + /// register name is invalid. + bool getRegisterByName(StringRef RegName, unsigned &Reg); + + void initNames2RegMasks(); + + /// Check if the given identifier is a name of a register mask. + /// + /// Return null if the identifier isn't a register mask. + const uint32_t *getRegMask(StringRef Identifier); + + void initNames2SubRegIndices(); + + /// Check if the given identifier is a name of a subregister index. + /// + /// Return 0 if the name isn't a subregister index class. + unsigned getSubRegIndex(StringRef Name); + + const BasicBlock *getIRBlock(unsigned Slot); + const BasicBlock *getIRBlock(unsigned Slot, const Function &F); + + const Value *getIRValue(unsigned Slot); + + void initNames2TargetIndices(); + + /// Try to convert a name of target index to the corresponding target index. + /// + /// Return true if the name isn't a name of a target index. + bool getTargetIndex(StringRef Name, int &Index); + + void initNames2DirectTargetFlags(); + + /// Try to convert a name of a direct target flag to the corresponding + /// target flag. + /// + /// Return true if the name isn't a name of a direct flag. + bool getDirectTargetFlag(StringRef Name, unsigned &Flag); + + void initNames2BitmaskTargetFlags(); + + /// Try to convert a name of a bitmask target flag to the corresponding + /// target flag. + /// + /// Return true if the name isn't a name of a bitmask target flag. + bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag); +}; + +} // end anonymous namespace + +MIParser::MIParser(SourceMgr &SM, MachineFunction &MF, SMDiagnostic &Error, + StringRef Source, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots) + : SM(SM), MF(MF), Error(Error), Source(Source), CurrentSource(Source), + PFS(PFS), IRSlots(IRSlots) {} + +void MIParser::lex() { + CurrentSource = lexMIToken( + CurrentSource, Token, + [this](StringRef::iterator Loc, const Twine &Msg) { error(Loc, Msg); }); +} + +bool MIParser::error(const Twine &Msg) { return error(Token.location(), Msg); } + +bool MIParser::error(StringRef::iterator Loc, const Twine &Msg) { + assert(Loc >= Source.data() && Loc <= (Source.data() + Source.size())); + const MemoryBuffer &Buffer = *SM.getMemoryBuffer(SM.getMainFileID()); + if (Loc >= Buffer.getBufferStart() && Loc <= Buffer.getBufferEnd()) { + // Create an ordinary diagnostic when the source manager's buffer is the + // source string. + Error = SM.GetMessage(SMLoc::getFromPointer(Loc), SourceMgr::DK_Error, Msg); + return true; + } + // Create a diagnostic for a YAML string literal. + Error = SMDiagnostic(SM, SMLoc(), Buffer.getBufferIdentifier(), 1, + Loc - Source.data(), SourceMgr::DK_Error, Msg.str(), + Source, None, None); + return true; +} + +static const char *toString(MIToken::TokenKind TokenKind) { + switch (TokenKind) { + case MIToken::comma: + return "','"; + case MIToken::equal: + return "'='"; + case MIToken::colon: + return "':'"; + case MIToken::lparen: + return "'('"; + case MIToken::rparen: + return "')'"; + default: + return "<unknown token>"; + } +} + +bool MIParser::expectAndConsume(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return error(Twine("expected ") + toString(TokenKind)); + lex(); + return false; +} + +bool MIParser::consumeIfPresent(MIToken::TokenKind TokenKind) { + if (Token.isNot(TokenKind)) + return false; + lex(); + return true; +} + +bool MIParser::parseBasicBlockDefinition( + DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + assert(Token.is(MIToken::MachineBasicBlockLabel)); + unsigned ID = 0; + if (getUnsigned(ID)) + return true; + auto Loc = Token.location(); + auto Name = Token.stringValue(); + lex(); + bool HasAddressTaken = false; + bool IsLandingPad = false; + unsigned Alignment = 0; + BasicBlock *BB = nullptr; + if (consumeIfPresent(MIToken::lparen)) { + do { + // TODO: Report an error when multiple same attributes are specified. + switch (Token.kind()) { + case MIToken::kw_address_taken: + HasAddressTaken = true; + lex(); + break; + case MIToken::kw_landing_pad: + IsLandingPad = true; + lex(); + break; + case MIToken::kw_align: + if (parseAlignment(Alignment)) + return true; + break; + case MIToken::IRBlock: + // TODO: Report an error when both name and ir block are specified. + if (parseIRBlock(BB, *MF.getFunction())) + return true; + lex(); + break; + default: + break; + } + } while (consumeIfPresent(MIToken::comma)); + if (expectAndConsume(MIToken::rparen)) + return true; + } + if (expectAndConsume(MIToken::colon)) + return true; + + if (!Name.empty()) { + BB = dyn_cast_or_null<BasicBlock>( + MF.getFunction()->getValueSymbolTable().lookup(Name)); + if (!BB) + return error(Loc, Twine("basic block '") + Name + + "' is not defined in the function '" + + MF.getName() + "'"); + } + auto *MBB = MF.CreateMachineBasicBlock(BB); + MF.insert(MF.end(), MBB); + bool WasInserted = MBBSlots.insert(std::make_pair(ID, MBB)).second; + if (!WasInserted) + return error(Loc, Twine("redefinition of machine basic block with id #") + + Twine(ID)); + if (Alignment) + MBB->setAlignment(Alignment); + if (HasAddressTaken) + MBB->setHasAddressTaken(); + MBB->setIsEHPad(IsLandingPad); + return false; +} + +bool MIParser::parseBasicBlockDefinitions( + DenseMap<unsigned, MachineBasicBlock *> &MBBSlots) { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + if (Token.isNot(MIToken::MachineBasicBlockLabel)) + return error("expected a basic block definition before instructions"); + unsigned BraceDepth = 0; + do { + if (parseBasicBlockDefinition(MBBSlots)) + return true; + bool IsAfterNewline = false; + // Skip until the next machine basic block. + while (true) { + if ((Token.is(MIToken::MachineBasicBlockLabel) && IsAfterNewline) || + Token.isErrorOrEOF()) + break; + else if (Token.is(MIToken::MachineBasicBlockLabel)) + return error("basic block definition should be located at the start of " + "the line"); + else if (consumeIfPresent(MIToken::Newline)) { + IsAfterNewline = true; + continue; + } + IsAfterNewline = false; + if (Token.is(MIToken::lbrace)) + ++BraceDepth; + if (Token.is(MIToken::rbrace)) { + if (!BraceDepth) + return error("extraneous closing brace ('}')"); + --BraceDepth; + } + lex(); + } + // Verify that we closed all of the '{' at the end of a file or a block. + if (!Token.isError() && BraceDepth) + return error("expected '}'"); // FIXME: Report a note that shows '{'. + } while (!Token.isErrorOrEOF()); + return Token.isError(); +} + +bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_liveins)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of liveins. + return false; + do { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseRegister(Reg)) + return true; + MBB.addLiveIn(Reg); + lex(); + } while (consumeIfPresent(MIToken::comma)); + return false; +} + +bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) { + assert(Token.is(MIToken::kw_successors)); + lex(); + if (expectAndConsume(MIToken::colon)) + return true; + if (Token.isNewlineOrEOF()) // Allow an empty list of successors. + return false; + do { + if (Token.isNot(MIToken::MachineBasicBlock)) + return error("expected a machine basic block reference"); + MachineBasicBlock *SuccMBB = nullptr; + if (parseMBBReference(SuccMBB)) + return true; + lex(); + unsigned Weight = 0; + if (consumeIfPresent(MIToken::lparen)) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '('"); + if (getUnsigned(Weight)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + } + MBB.addSuccessor(SuccMBB, BranchProbability::getRaw(Weight)); + } while (consumeIfPresent(MIToken::comma)); + MBB.normalizeSuccProbs(); + return false; +} + +bool MIParser::parseBasicBlock(MachineBasicBlock &MBB) { + // Skip the definition. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + lex(); + if (consumeIfPresent(MIToken::lparen)) { + while (Token.isNot(MIToken::rparen) && !Token.isErrorOrEOF()) + lex(); + consumeIfPresent(MIToken::rparen); + } + consumeIfPresent(MIToken::colon); + + // Parse the liveins and successors. + // N.B: Multiple lists of successors and liveins are allowed and they're + // merged into one. + // Example: + // liveins: %edi + // liveins: %esi + // + // is equivalent to + // liveins: %edi, %esi + while (true) { + if (Token.is(MIToken::kw_successors)) { + if (parseBasicBlockSuccessors(MBB)) + return true; + } else if (Token.is(MIToken::kw_liveins)) { + if (parseBasicBlockLiveins(MBB)) + return true; + } else if (consumeIfPresent(MIToken::Newline)) { + continue; + } else + break; + if (!Token.isNewlineOrEOF()) + return error("expected line break at the end of a list"); + lex(); + } + + // Parse the instructions. + bool IsInBundle = false; + MachineInstr *PrevMI = nullptr; + while (true) { + if (Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)) + return false; + else if (consumeIfPresent(MIToken::Newline)) + continue; + if (consumeIfPresent(MIToken::rbrace)) { + // The first parsing pass should verify that all closing '}' have an + // opening '{'. + assert(IsInBundle); + IsInBundle = false; + continue; + } + MachineInstr *MI = nullptr; + if (parse(MI)) + return true; + MBB.insert(MBB.end(), MI); + if (IsInBundle) { + PrevMI->setFlag(MachineInstr::BundledSucc); + MI->setFlag(MachineInstr::BundledPred); + } + PrevMI = MI; + if (Token.is(MIToken::lbrace)) { + if (IsInBundle) + return error("nested instruction bundles are not allowed"); + lex(); + // This instruction is the start of the bundle. + MI->setFlag(MachineInstr::BundledSucc); + IsInBundle = true; + if (!Token.is(MIToken::Newline)) + // The next instruction can be on the same line. + continue; + } + assert(Token.isNewlineOrEOF() && "MI is not fully parsed"); + lex(); + } + return false; +} + +bool MIParser::parseBasicBlocks() { + lex(); + // Skip until the first machine basic block. + while (Token.is(MIToken::Newline)) + lex(); + if (Token.isErrorOrEOF()) + return Token.isError(); + // The first parsing pass should have verified that this token is a MBB label + // in the 'parseBasicBlockDefinitions' method. + assert(Token.is(MIToken::MachineBasicBlockLabel)); + do { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB)) + return true; + if (parseBasicBlock(*MBB)) + return true; + // The method 'parseBasicBlock' should parse the whole block until the next + // block or the end of file. + assert(Token.is(MIToken::MachineBasicBlockLabel) || Token.is(MIToken::Eof)); + } while (Token.isNot(MIToken::Eof)); + return false; +} + +bool MIParser::parse(MachineInstr *&MI) { + // Parse any register operands before '=' + MachineOperand MO = MachineOperand::CreateImm(0); + SmallVector<ParsedMachineOperand, 8> Operands; + while (Token.isRegister() || Token.isRegisterFlag()) { + auto Loc = Token.location(); + Optional<unsigned> TiedDefIdx; + if (parseRegisterOperand(MO, TiedDefIdx, /*IsDef=*/true)) + return true; + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (!Operands.empty() && expectAndConsume(MIToken::equal)) + return true; + + unsigned OpCode, Flags = 0; + if (Token.isError() || parseInstruction(OpCode, Flags)) + return true; + + // Parse the remaining machine operands. + while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) && + Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) { + auto Loc = Token.location(); + Optional<unsigned> TiedDefIdx; + if (parseMachineOperandAndTargetFlags(MO, TiedDefIdx)) + return true; + Operands.push_back( + ParsedMachineOperand(MO, Loc, Token.location(), TiedDefIdx)); + if (Token.isNewlineOrEOF() || Token.is(MIToken::coloncolon) || + Token.is(MIToken::lbrace)) + break; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine operand"); + lex(); + } + + DebugLoc DebugLocation; + if (Token.is(MIToken::kw_debug_location)) { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node after 'debug-location'"); + MDNode *Node = nullptr; + if (parseMDNode(Node)) + return true; + DebugLocation = DebugLoc(Node); + } + + // Parse the machine memory operands. + SmallVector<MachineMemOperand *, 2> MemOperands; + if (Token.is(MIToken::coloncolon)) { + lex(); + while (!Token.isNewlineOrEOF()) { + MachineMemOperand *MemOp = nullptr; + if (parseMachineMemoryOperand(MemOp)) + return true; + MemOperands.push_back(MemOp); + if (Token.isNewlineOrEOF()) + break; + if (Token.isNot(MIToken::comma)) + return error("expected ',' before the next machine memory operand"); + lex(); + } + } + + const auto &MCID = MF.getSubtarget().getInstrInfo()->get(OpCode); + if (!MCID.isVariadic()) { + // FIXME: Move the implicit operand verification to the machine verifier. + if (verifyImplicitOperands(Operands, MCID)) + return true; + } + + // TODO: Check for extraneous machine operands. + MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true); + MI->setFlags(Flags); + for (const auto &Operand : Operands) + MI->addOperand(MF, Operand.Operand); + if (assignRegisterTies(*MI, Operands)) + return true; + if (MemOperands.empty()) + return false; + MachineInstr::mmo_iterator MemRefs = + MF.allocateMemRefsArray(MemOperands.size()); + std::copy(MemOperands.begin(), MemOperands.end(), MemRefs); + MI->setMemRefs(MemRefs, MemRefs + MemOperands.size()); + return false; +} + +bool MIParser::parseStandaloneMBB(MachineBasicBlock *&MBB) { + lex(); + if (Token.isNot(MIToken::MachineBasicBlock)) + return error("expected a machine basic block reference"); + if (parseMBBReference(MBB)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error( + "expected end of string after the machine basic block reference"); + return false; +} + +bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + if (parseRegister(Reg)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) { + lex(); + if (Token.isNot(MIToken::VirtualRegister)) + return error("expected a virtual register"); + if (parseRegister(Reg)) + return true; + lex(); + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the register reference"); + return false; +} + +bool MIParser::parseStandaloneStackObject(int &FI) { + lex(); + if (Token.isNot(MIToken::StackObject)) + return error("expected a stack object"); + if (parseStackFrameIndex(FI)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the stack object reference"); + return false; +} + +bool MIParser::parseStandaloneMDNode(MDNode *&Node) { + lex(); + if (Token.isNot(MIToken::exclaim)) + return error("expected a metadata node"); + if (parseMDNode(Node)) + return true; + if (Token.isNot(MIToken::Eof)) + return error("expected end of string after the metadata node"); + return false; +} + +static const char *printImplicitRegisterFlag(const MachineOperand &MO) { + assert(MO.isImplicit()); + return MO.isDef() ? "implicit-def" : "implicit"; +} + +static std::string getRegisterName(const TargetRegisterInfo *TRI, + unsigned Reg) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg"); + return StringRef(TRI->getName(Reg)).lower(); +} + +/// Return true if the parsed machine operands contain a given machine operand. +static bool isImplicitOperandIn(const MachineOperand &ImplicitOperand, + ArrayRef<ParsedMachineOperand> Operands) { + for (const auto &I : Operands) { + if (ImplicitOperand.isIdenticalTo(I.Operand)) + return true; + } + return false; +} + +bool MIParser::verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands, + const MCInstrDesc &MCID) { + if (MCID.isCall()) + // We can't verify call instructions as they can contain arbitrary implicit + // register and register mask operands. + return false; + + // Gather all the expected implicit operands. + SmallVector<MachineOperand, 4> ImplicitOperands; + if (MCID.ImplicitDefs) + for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + ImplicitOperands.push_back( + MachineOperand::CreateReg(*ImpDefs, true, true)); + if (MCID.ImplicitUses) + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + ImplicitOperands.push_back( + MachineOperand::CreateReg(*ImpUses, false, true)); + + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + for (const auto &I : ImplicitOperands) { + if (isImplicitOperandIn(I, Operands)) + continue; + return error(Operands.empty() ? Token.location() : Operands.back().End, + Twine("missing implicit register operand '") + + printImplicitRegisterFlag(I) + " %" + + getRegisterName(TRI, I.getReg()) + "'"); + } + return false; +} + +bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) { + if (Token.is(MIToken::kw_frame_setup)) { + Flags |= MachineInstr::FrameSetup; + lex(); + } + if (Token.isNot(MIToken::Identifier)) + return error("expected a machine instruction"); + StringRef InstrName = Token.stringValue(); + if (parseInstrName(InstrName, OpCode)) + return error(Twine("unknown machine instruction name '") + InstrName + "'"); + lex(); + return false; +} + +bool MIParser::parseRegister(unsigned &Reg) { + switch (Token.kind()) { + case MIToken::underscore: + Reg = 0; + break; + case MIToken::NamedRegister: { + StringRef Name = Token.stringValue(); + if (getRegisterByName(Name, Reg)) + return error(Twine("unknown register name '") + Name + "'"); + break; + } + case MIToken::VirtualRegister: { + unsigned ID; + if (getUnsigned(ID)) + return true; + const auto RegInfo = PFS.VirtualRegisterSlots.find(ID); + if (RegInfo == PFS.VirtualRegisterSlots.end()) + return error(Twine("use of undefined virtual register '%") + Twine(ID) + + "'"); + Reg = RegInfo->second; + break; + } + // TODO: Parse other register kinds. + default: + llvm_unreachable("The current token should be a register"); + } + return false; +} + +bool MIParser::parseRegisterFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; + switch (Token.kind()) { + case MIToken::kw_implicit: + Flags |= RegState::Implicit; + break; + case MIToken::kw_implicit_define: + Flags |= RegState::ImplicitDefine; + break; + case MIToken::kw_def: + Flags |= RegState::Define; + break; + case MIToken::kw_dead: + Flags |= RegState::Dead; + break; + case MIToken::kw_killed: + Flags |= RegState::Kill; + break; + case MIToken::kw_undef: + Flags |= RegState::Undef; + break; + case MIToken::kw_internal: + Flags |= RegState::InternalRead; + break; + case MIToken::kw_early_clobber: + Flags |= RegState::EarlyClobber; + break; + case MIToken::kw_debug_use: + Flags |= RegState::Debug; + break; + default: + llvm_unreachable("The current token should be a register flag"); + } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' register flag"); + lex(); + return false; +} + +bool MIParser::parseSubRegisterIndex(unsigned &SubReg) { + assert(Token.is(MIToken::colon)); + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected a subregister index after ':'"); + auto Name = Token.stringValue(); + SubReg = getSubRegIndex(Name); + if (!SubReg) + return error(Twine("use of unknown subregister index '") + Name + "'"); + lex(); + return false; +} + +bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) { + if (!consumeIfPresent(MIToken::kw_tied_def)) + return error("expected 'tied-def' after '('"); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after 'tied-def'"); + if (getUnsigned(TiedDefIdx)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + return false; +} + +bool MIParser::assignRegisterTies(MachineInstr &MI, + ArrayRef<ParsedMachineOperand> Operands) { + SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs; + for (unsigned I = 0, E = Operands.size(); I != E; ++I) { + if (!Operands[I].TiedDefIdx) + continue; + // The parser ensures that this operand is a register use, so we just have + // to check the tied-def operand. + unsigned DefIdx = Operands[I].TiedDefIdx.getValue(); + if (DefIdx >= E) + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '" + + Twine(DefIdx) + "'; instruction has only ") + + Twine(E) + " operands"); + const auto &DefOperand = Operands[DefIdx].Operand; + if (!DefOperand.isReg() || !DefOperand.isDef()) + // FIXME: add note with the def operand. + return error(Operands[I].Begin, + Twine("use of invalid tied-def operand index '") + + Twine(DefIdx) + "'; the operand #" + Twine(DefIdx) + + " isn't a defined register"); + // Check that the tied-def operand wasn't tied elsewhere. + for (const auto &TiedPair : TiedRegisterPairs) { + if (TiedPair.first == DefIdx) + return error(Operands[I].Begin, + Twine("the tied-def operand #") + Twine(DefIdx) + + " is already tied with another register operand"); + } + TiedRegisterPairs.push_back(std::make_pair(DefIdx, I)); + } + // FIXME: Verify that for non INLINEASM instructions, the def and use tied + // indices must be less than tied max. + for (const auto &TiedPair : TiedRegisterPairs) + MI.tieOperands(TiedPair.first, TiedPair.second); + return false; +} + +bool MIParser::parseRegisterOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx, + bool IsDef) { + unsigned Reg; + unsigned Flags = IsDef ? RegState::Define : 0; + while (Token.isRegisterFlag()) { + if (parseRegisterFlag(Flags)) + return true; + } + if (!Token.isRegister()) + return error("expected a register after register flags"); + if (parseRegister(Reg)) + return true; + lex(); + unsigned SubReg = 0; + if (Token.is(MIToken::colon)) { + if (parseSubRegisterIndex(SubReg)) + return true; + } + if ((Flags & RegState::Define) == 0 && consumeIfPresent(MIToken::lparen)) { + unsigned Idx; + if (parseRegisterTiedDefIndex(Idx)) + return true; + TiedDefIdx = Idx; + } + Dest = MachineOperand::CreateReg( + Reg, Flags & RegState::Define, Flags & RegState::Implicit, + Flags & RegState::Kill, Flags & RegState::Dead, Flags & RegState::Undef, + Flags & RegState::EarlyClobber, SubReg, Flags & RegState::Debug, + Flags & RegState::InternalRead); + return false; +} + +bool MIParser::parseImmediateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::IntegerLiteral)); + const APSInt &Int = Token.integerValue(); + if (Int.getMinSignedBits() > 64) + return error("integer literal is too large to be an immediate operand"); + Dest = MachineOperand::CreateImm(Int.getExtValue()); + lex(); + return false; +} + +bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue, + const Constant *&C) { + auto Source = StringValue.str(); // The source has to be null terminated. + SMDiagnostic Err; + C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(), + &IRSlots); + if (!C) + return error(Loc + Err.getColumnNo(), Err.getMessage()); + return false; +} + +bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) { + if (parseIRConstant(Loc, StringRef(Loc, Token.range().end() - Loc), C)) + return true; + lex(); + return false; +} + +bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::IntegerType)); + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateCImm(cast<ConstantInt>(C)); + return false; +} + +bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) { + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::FloatingPointLiteral)) + return error("expected a floating point literal"); + const Constant *C = nullptr; + if (parseIRConstant(Loc, C)) + return true; + Dest = MachineOperand::CreateFPImm(cast<ConstantFP>(C)); + return false; +} + +bool MIParser::getUnsigned(unsigned &Result) { + assert(Token.hasIntegerValue() && "Expected a token with an integer value"); + const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1; + uint64_t Val64 = Token.integerValue().getLimitedValue(Limit); + if (Val64 == Limit) + return error("expected 32-bit integer (too large)"); + Result = Val64; + return false; +} + +bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) { + assert(Token.is(MIToken::MachineBasicBlock) || + Token.is(MIToken::MachineBasicBlockLabel)); + unsigned Number; + if (getUnsigned(Number)) + return true; + auto MBBInfo = PFS.MBBSlots.find(Number); + if (MBBInfo == PFS.MBBSlots.end()) + return error(Twine("use of undefined machine basic block #") + + Twine(Number)); + MBB = MBBInfo->second; + if (!Token.stringValue().empty() && Token.stringValue() != MBB->getName()) + return error(Twine("the name of machine basic block #") + Twine(Number) + + " isn't '" + Token.stringValue() + "'"); + return false; +} + +bool MIParser::parseMBBOperand(MachineOperand &Dest) { + MachineBasicBlock *MBB; + if (parseMBBReference(MBB)) + return true; + Dest = MachineOperand::CreateMBB(MBB); + lex(); + return false; +} + +bool MIParser::parseStackFrameIndex(int &FI) { + assert(Token.is(MIToken::StackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.StackObjectSlots.find(ID); + if (ObjectInfo == PFS.StackObjectSlots.end()) + return error(Twine("use of undefined stack object '%stack.") + Twine(ID) + + "'"); + StringRef Name; + if (const auto *Alloca = + MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second)) + Name = Alloca->getName(); + if (!Token.stringValue().empty() && Token.stringValue() != Name) + return error(Twine("the name of the stack object '%stack.") + Twine(ID) + + "' isn't '" + Token.stringValue() + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseFixedStackFrameIndex(int &FI) { + assert(Token.is(MIToken::FixedStackObject)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ObjectInfo = PFS.FixedStackObjectSlots.find(ID); + if (ObjectInfo == PFS.FixedStackObjectSlots.end()) + return error(Twine("use of undefined fixed stack object '%fixed-stack.") + + Twine(ID) + "'"); + lex(); + FI = ObjectInfo->second; + return false; +} + +bool MIParser::parseFixedStackObjectOperand(MachineOperand &Dest) { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + Dest = MachineOperand::CreateFI(FI); + return false; +} + +bool MIParser::parseGlobalValue(GlobalValue *&GV) { + switch (Token.kind()) { + case MIToken::NamedGlobalValue: { + const Module *M = MF.getFunction()->getParent(); + GV = M->getNamedValue(Token.stringValue()); + if (!GV) + return error(Twine("use of undefined global value '") + Token.range() + + "'"); + break; + } + case MIToken::GlobalValue: { + unsigned GVIdx; + if (getUnsigned(GVIdx)) + return true; + if (GVIdx >= IRSlots.GlobalValues.size()) + return error(Twine("use of undefined global value '@") + Twine(GVIdx) + + "'"); + GV = IRSlots.GlobalValues[GVIdx]; + break; + } + default: + llvm_unreachable("The current token should be a global value"); + } + return false; +} + +bool MIParser::parseGlobalAddressOperand(MachineOperand &Dest) { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + lex(); + Dest = MachineOperand::CreateGA(GV, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseConstantPoolIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ConstantPoolItem)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto ConstantInfo = PFS.ConstantPoolSlots.find(ID); + if (ConstantInfo == PFS.ConstantPoolSlots.end()) + return error("use of undefined constant '%const." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateCPI(ID, /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseJumpTableIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::JumpTableIndex)); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto JumpTableEntryInfo = PFS.JumpTableSlots.find(ID); + if (JumpTableEntryInfo == PFS.JumpTableSlots.end()) + return error("use of undefined jump table '%jump-table." + Twine(ID) + "'"); + lex(); + Dest = MachineOperand::CreateJTI(JumpTableEntryInfo->second); + return false; +} + +bool MIParser::parseExternalSymbolOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::ExternalSymbol)); + const char *Symbol = MF.createExternalSymbolName(Token.stringValue()); + lex(); + Dest = MachineOperand::CreateES(Symbol); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseMDNode(MDNode *&Node) { + assert(Token.is(MIToken::exclaim)); + auto Loc = Token.location(); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected metadata id after '!'"); + unsigned ID; + if (getUnsigned(ID)) + return true; + auto NodeInfo = IRSlots.MetadataNodes.find(ID); + if (NodeInfo == IRSlots.MetadataNodes.end()) + return error(Loc, "use of undefined metadata '!" + Twine(ID) + "'"); + lex(); + Node = NodeInfo->second.get(); + return false; +} + +bool MIParser::parseMetadataOperand(MachineOperand &Dest) { + MDNode *Node = nullptr; + if (parseMDNode(Node)) + return true; + Dest = MachineOperand::CreateMetadata(Node); + return false; +} + +bool MIParser::parseCFIOffset(int &Offset) { + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected a cfi offset"); + if (Token.integerValue().getMinSignedBits() > 32) + return error("expected a 32 bit integer (the cfi offset is too large)"); + Offset = (int)Token.integerValue().getExtValue(); + lex(); + return false; +} + +bool MIParser::parseCFIRegister(unsigned &Reg) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a cfi register"); + unsigned LLVMReg; + if (parseRegister(LLVMReg)) + return true; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + int DwarfReg = TRI->getDwarfRegNum(LLVMReg, true); + if (DwarfReg < 0) + return error("invalid DWARF register"); + Reg = (unsigned)DwarfReg; + lex(); + return false; +} + +bool MIParser::parseCFIOperand(MachineOperand &Dest) { + auto Kind = Token.kind(); + lex(); + auto &MMI = MF.getMMI(); + int Offset; + unsigned Reg; + unsigned CFIIndex; + switch (Kind) { + case MIToken::kw_cfi_same_value: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg)); + break; + case MIToken::kw_cfi_offset: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset)); + break; + case MIToken::kw_cfi_def_cfa_register: + if (parseCFIRegister(Reg)) + return true; + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); + break; + case MIToken::kw_cfi_def_cfa_offset: + if (parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfaOffset negates the offset. + CFIIndex = MMI.addFrameInst( + MCCFIInstruction::createDefCfaOffset(nullptr, -Offset)); + break; + case MIToken::kw_cfi_def_cfa: + if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) || + parseCFIOffset(Offset)) + return true; + // NB: MCCFIInstruction::createDefCfa negates the offset. + CFIIndex = + MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset)); + break; + default: + // TODO: Parse the other CFI operands. + llvm_unreachable("The current token should be a cfi operand"); + } + Dest = MachineOperand::CreateCFIIndex(CFIIndex); + return false; +} + +bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) { + switch (Token.kind()) { + case MIToken::NamedIRBlock: { + BB = dyn_cast_or_null<BasicBlock>( + F.getValueSymbolTable().lookup(Token.stringValue())); + if (!BB) + return error(Twine("use of undefined IR block '") + Token.range() + "'"); + break; + } + case MIToken::IRBlock: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + BB = const_cast<BasicBlock *>(getIRBlock(SlotNumber, F)); + if (!BB) + return error(Twine("use of undefined IR block '%ir-block.") + + Twine(SlotNumber) + "'"); + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + return false; +} + +bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_blockaddress)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue)) + return error("expected a global value"); + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + auto *F = dyn_cast<Function>(GV); + if (!F) + return error("expected an IR function reference"); + lex(); + if (expectAndConsume(MIToken::comma)) + return true; + BasicBlock *BB = nullptr; + if (Token.isNot(MIToken::IRBlock) && Token.isNot(MIToken::NamedIRBlock)) + return error("expected an IR block reference"); + if (parseIRBlock(BB, *F)) + return true; + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateBA(BlockAddress::get(F, BB), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_target_index)); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target index"); + int Index = 0; + if (getTargetIndex(Token.stringValue(), Index)) + return error("use of undefined target index '" + Token.stringValue() + "'"); + lex(); + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateTargetIndex(unsigned(Index), /*Offset=*/0); + if (parseOperandsOffset(Dest)) + return true; + return false; +} + +bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) { + assert(Token.is(MIToken::kw_liveout)); + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + while (true) { + if (Token.isNot(MIToken::NamedRegister)) + return error("expected a named register"); + unsigned Reg = 0; + if (parseRegister(Reg)) + return true; + lex(); + Mask[Reg / 32] |= 1U << (Reg % 32); + // TODO: Report an error if the same register is used more than once. + if (Token.isNot(MIToken::comma)) + break; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = MachineOperand::CreateRegLiveOut(Mask); + return false; +} + +bool MIParser::parseMachineOperand(MachineOperand &Dest, + Optional<unsigned> &TiedDefIdx) { + switch (Token.kind()) { + case MIToken::kw_implicit: + case MIToken::kw_implicit_define: + case MIToken::kw_def: + case MIToken::kw_dead: + case MIToken::kw_killed: + case MIToken::kw_undef: + case MIToken::kw_internal: + case MIToken::kw_early_clobber: + case MIToken::kw_debug_use: + case MIToken::underscore: + case MIToken::NamedRegister: + case MIToken::VirtualRegister: + return parseRegisterOperand(Dest, TiedDefIdx); + case MIToken::IntegerLiteral: + return parseImmediateOperand(Dest); + case MIToken::IntegerType: + return parseTypedImmediateOperand(Dest); + case MIToken::kw_half: + case MIToken::kw_float: + case MIToken::kw_double: + case MIToken::kw_x86_fp80: + case MIToken::kw_fp128: + case MIToken::kw_ppc_fp128: + return parseFPImmediateOperand(Dest); + case MIToken::MachineBasicBlock: + return parseMBBOperand(Dest); + case MIToken::StackObject: + return parseStackObjectOperand(Dest); + case MIToken::FixedStackObject: + return parseFixedStackObjectOperand(Dest); + case MIToken::GlobalValue: + case MIToken::NamedGlobalValue: + return parseGlobalAddressOperand(Dest); + case MIToken::ConstantPoolItem: + return parseConstantPoolIndexOperand(Dest); + case MIToken::JumpTableIndex: + return parseJumpTableIndexOperand(Dest); + case MIToken::ExternalSymbol: + return parseExternalSymbolOperand(Dest); + case MIToken::exclaim: + return parseMetadataOperand(Dest); + case MIToken::kw_cfi_same_value: + case MIToken::kw_cfi_offset: + case MIToken::kw_cfi_def_cfa_register: + case MIToken::kw_cfi_def_cfa_offset: + case MIToken::kw_cfi_def_cfa: + return parseCFIOperand(Dest); + case MIToken::kw_blockaddress: + return parseBlockAddressOperand(Dest); + case MIToken::kw_target_index: + return parseTargetIndexOperand(Dest); + case MIToken::kw_liveout: + return parseLiveoutRegisterMaskOperand(Dest); + case MIToken::Error: + return true; + case MIToken::Identifier: + if (const auto *RegMask = getRegMask(Token.stringValue())) { + Dest = MachineOperand::CreateRegMask(RegMask); + lex(); + break; + } + // fallthrough + default: + // FIXME: Parse the MCSymbol machine operand. + return error("expected a machine operand"); + } + return false; +} + +bool MIParser::parseMachineOperandAndTargetFlags( + MachineOperand &Dest, Optional<unsigned> &TiedDefIdx) { + unsigned TF = 0; + bool HasTargetFlags = false; + if (Token.is(MIToken::kw_target_flags)) { + HasTargetFlags = true; + lex(); + if (expectAndConsume(MIToken::lparen)) + return true; + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + if (getDirectTargetFlag(Token.stringValue(), TF)) { + if (getBitmaskTargetFlag(Token.stringValue(), TF)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + } + lex(); + while (Token.is(MIToken::comma)) { + lex(); + if (Token.isNot(MIToken::Identifier)) + return error("expected the name of the target flag"); + unsigned BitFlag = 0; + if (getBitmaskTargetFlag(Token.stringValue(), BitFlag)) + return error("use of undefined target flag '" + Token.stringValue() + + "'"); + // TODO: Report an error when using a duplicate bit target flag. + TF |= BitFlag; + lex(); + } + if (expectAndConsume(MIToken::rparen)) + return true; + } + auto Loc = Token.location(); + if (parseMachineOperand(Dest, TiedDefIdx)) + return true; + if (!HasTargetFlags) + return false; + if (Dest.isReg()) + return error(Loc, "register operands can't have target flags"); + Dest.setTargetFlags(TF); + return false; +} + +bool MIParser::parseOffset(int64_t &Offset) { + if (Token.isNot(MIToken::plus) && Token.isNot(MIToken::minus)) + return false; + StringRef Sign = Token.range(); + bool IsNegative = Token.is(MIToken::minus); + lex(); + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected an integer literal after '" + Sign + "'"); + if (Token.integerValue().getMinSignedBits() > 64) + return error("expected 64-bit integer (too large)"); + Offset = Token.integerValue().getExtValue(); + if (IsNegative) + Offset = -Offset; + lex(); + return false; +} + +bool MIParser::parseAlignment(unsigned &Alignment) { + assert(Token.is(MIToken::kw_align)); + lex(); + if (Token.isNot(MIToken::IntegerLiteral) || Token.integerValue().isSigned()) + return error("expected an integer literal after 'align'"); + if (getUnsigned(Alignment)) + return true; + lex(); + return false; +} + +bool MIParser::parseOperandsOffset(MachineOperand &Op) { + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Op.setOffset(Offset); + return false; +} + +bool MIParser::parseIRValue(const Value *&V) { + switch (Token.kind()) { + case MIToken::NamedIRValue: { + V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue()); + break; + } + case MIToken::IRValue: { + unsigned SlotNumber = 0; + if (getUnsigned(SlotNumber)) + return true; + V = getIRValue(SlotNumber); + break; + } + case MIToken::NamedGlobalValue: + case MIToken::GlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + V = GV; + break; + } + case MIToken::QuotedIRValue: { + const Constant *C = nullptr; + if (parseIRConstant(Token.location(), Token.stringValue(), C)) + return true; + V = C; + break; + } + default: + llvm_unreachable("The current token should be an IR block reference"); + } + if (!V) + return error(Twine("use of undefined IR value '") + Token.range() + "'"); + return false; +} + +bool MIParser::getUint64(uint64_t &Result) { + assert(Token.hasIntegerValue()); + if (Token.integerValue().getActiveBits() > 64) + return error("expected 64-bit integer (too large)"); + Result = Token.integerValue().getZExtValue(); + return false; +} + +bool MIParser::parseMemoryOperandFlag(unsigned &Flags) { + const unsigned OldFlags = Flags; + switch (Token.kind()) { + case MIToken::kw_volatile: + Flags |= MachineMemOperand::MOVolatile; + break; + case MIToken::kw_non_temporal: + Flags |= MachineMemOperand::MONonTemporal; + break; + case MIToken::kw_invariant: + Flags |= MachineMemOperand::MOInvariant; + break; + // TODO: parse the target specific memory operand flags. + default: + llvm_unreachable("The current token should be a memory operand flag"); + } + if (OldFlags == Flags) + // We know that the same flag is specified more than once when the flags + // weren't modified. + return error("duplicate '" + Token.stringValue() + "' memory operand flag"); + lex(); + return false; +} + +bool MIParser::parseMemoryPseudoSourceValue(const PseudoSourceValue *&PSV) { + switch (Token.kind()) { + case MIToken::kw_stack: + PSV = MF.getPSVManager().getStack(); + break; + case MIToken::kw_got: + PSV = MF.getPSVManager().getGOT(); + break; + case MIToken::kw_jump_table: + PSV = MF.getPSVManager().getJumpTable(); + break; + case MIToken::kw_constant_pool: + PSV = MF.getPSVManager().getConstantPool(); + break; + case MIToken::FixedStackObject: { + int FI; + if (parseFixedStackFrameIndex(FI)) + return true; + PSV = MF.getPSVManager().getFixedStack(FI); + // The token was already consumed, so use return here instead of break. + return false; + } + case MIToken::kw_call_entry: { + lex(); + switch (Token.kind()) { + case MIToken::GlobalValue: + case MIToken::NamedGlobalValue: { + GlobalValue *GV = nullptr; + if (parseGlobalValue(GV)) + return true; + PSV = MF.getPSVManager().getGlobalValueCallEntry(GV); + break; + } + case MIToken::ExternalSymbol: + PSV = MF.getPSVManager().getExternalSymbolCallEntry( + MF.createExternalSymbolName(Token.stringValue())); + break; + default: + return error( + "expected a global value or an external symbol after 'call-entry'"); + } + break; + } + default: + llvm_unreachable("The current token should be pseudo source value"); + } + lex(); + return false; +} + +bool MIParser::parseMachinePointerInfo(MachinePointerInfo &Dest) { + if (Token.is(MIToken::kw_constant_pool) || Token.is(MIToken::kw_stack) || + Token.is(MIToken::kw_got) || Token.is(MIToken::kw_jump_table) || + Token.is(MIToken::FixedStackObject) || Token.is(MIToken::kw_call_entry)) { + const PseudoSourceValue *PSV = nullptr; + if (parseMemoryPseudoSourceValue(PSV)) + return true; + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(PSV, Offset); + return false; + } + if (Token.isNot(MIToken::NamedIRValue) && Token.isNot(MIToken::IRValue) && + Token.isNot(MIToken::GlobalValue) && + Token.isNot(MIToken::NamedGlobalValue) && + Token.isNot(MIToken::QuotedIRValue)) + return error("expected an IR value reference"); + const Value *V = nullptr; + if (parseIRValue(V)) + return true; + if (!V->getType()->isPointerTy()) + return error("expected a pointer IR value"); + lex(); + int64_t Offset = 0; + if (parseOffset(Offset)) + return true; + Dest = MachinePointerInfo(V, Offset); + return false; +} + +bool MIParser::parseMachineMemoryOperand(MachineMemOperand *&Dest) { + if (expectAndConsume(MIToken::lparen)) + return true; + unsigned Flags = 0; + while (Token.isMemoryOperandFlag()) { + if (parseMemoryOperandFlag(Flags)) + return true; + } + if (Token.isNot(MIToken::Identifier) || + (Token.stringValue() != "load" && Token.stringValue() != "store")) + return error("expected 'load' or 'store' memory operation"); + if (Token.stringValue() == "load") + Flags |= MachineMemOperand::MOLoad; + else + Flags |= MachineMemOperand::MOStore; + lex(); + + if (Token.isNot(MIToken::IntegerLiteral)) + return error("expected the size integer literal after memory operation"); + uint64_t Size; + if (getUint64(Size)) + return true; + lex(); + + const char *Word = Flags & MachineMemOperand::MOLoad ? "from" : "into"; + if (Token.isNot(MIToken::Identifier) || Token.stringValue() != Word) + return error(Twine("expected '") + Word + "'"); + lex(); + + MachinePointerInfo Ptr = MachinePointerInfo(); + if (parseMachinePointerInfo(Ptr)) + return true; + unsigned BaseAlignment = Size; + AAMDNodes AAInfo; + MDNode *Range = nullptr; + while (consumeIfPresent(MIToken::comma)) { + switch (Token.kind()) { + case MIToken::kw_align: + if (parseAlignment(BaseAlignment)) + return true; + break; + case MIToken::md_tbaa: + lex(); + if (parseMDNode(AAInfo.TBAA)) + return true; + break; + case MIToken::md_alias_scope: + lex(); + if (parseMDNode(AAInfo.Scope)) + return true; + break; + case MIToken::md_noalias: + lex(); + if (parseMDNode(AAInfo.NoAlias)) + return true; + break; + case MIToken::md_range: + lex(); + if (parseMDNode(Range)) + return true; + break; + // TODO: Report an error on duplicate metadata nodes. + default: + return error("expected 'align' or '!tbaa' or '!alias.scope' or " + "'!noalias' or '!range'"); + } + } + if (expectAndConsume(MIToken::rparen)) + return true; + Dest = + MF.getMachineMemOperand(Ptr, Flags, Size, BaseAlignment, AAInfo, Range); + return false; +} + +void MIParser::initNames2InstrOpCodes() { + if (!Names2InstrOpCodes.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I) + Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I)); +} + +bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) { + initNames2InstrOpCodes(); + auto InstrInfo = Names2InstrOpCodes.find(InstrName); + if (InstrInfo == Names2InstrOpCodes.end()) + return true; + OpCode = InstrInfo->getValue(); + return false; +} + +void MIParser::initNames2Regs() { + if (!Names2Regs.empty()) + return; + // The '%noreg' register is the register 0. + Names2Regs.insert(std::make_pair("noreg", 0)); + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) { + bool WasInserted = + Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I)) + .second; + (void)WasInserted; + assert(WasInserted && "Expected registers to be unique case-insensitively"); + } +} + +bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) { + initNames2Regs(); + auto RegInfo = Names2Regs.find(RegName); + if (RegInfo == Names2Regs.end()) + return true; + Reg = RegInfo->getValue(); + return false; +} + +void MIParser::initNames2RegMasks() { + if (!Names2RegMasks.empty()) + return; + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks(); + ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames(); + assert(RegMasks.size() == RegMaskNames.size()); + for (size_t I = 0, E = RegMasks.size(); I < E; ++I) + Names2RegMasks.insert( + std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I])); +} + +const uint32_t *MIParser::getRegMask(StringRef Identifier) { + initNames2RegMasks(); + auto RegMaskInfo = Names2RegMasks.find(Identifier); + if (RegMaskInfo == Names2RegMasks.end()) + return nullptr; + return RegMaskInfo->getValue(); +} + +void MIParser::initNames2SubRegIndices() { + if (!Names2SubRegIndices.empty()) + return; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I) + Names2SubRegIndices.insert( + std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I)); +} + +unsigned MIParser::getSubRegIndex(StringRef Name) { + initNames2SubRegIndices(); + auto SubRegInfo = Names2SubRegIndices.find(Name); + if (SubRegInfo == Names2SubRegIndices.end()) + return 0; + return SubRegInfo->getValue(); +} + +static void initSlots2BasicBlocks( + const Function &F, + DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (auto &BB : F) { + if (BB.hasName()) + continue; + int Slot = MST.getLocalSlot(&BB); + if (Slot == -1) + continue; + Slots2BasicBlocks.insert(std::make_pair(unsigned(Slot), &BB)); + } +} + +static const BasicBlock *getIRBlockFromSlot( + unsigned Slot, + const DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) { + auto BlockInfo = Slots2BasicBlocks.find(Slot); + if (BlockInfo == Slots2BasicBlocks.end()) + return nullptr; + return BlockInfo->second; +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot) { + if (Slots2BasicBlocks.empty()) + initSlots2BasicBlocks(*MF.getFunction(), Slots2BasicBlocks); + return getIRBlockFromSlot(Slot, Slots2BasicBlocks); +} + +const BasicBlock *MIParser::getIRBlock(unsigned Slot, const Function &F) { + if (&F == MF.getFunction()) + return getIRBlock(Slot); + DenseMap<unsigned, const BasicBlock *> CustomSlots2BasicBlocks; + initSlots2BasicBlocks(F, CustomSlots2BasicBlocks); + return getIRBlockFromSlot(Slot, CustomSlots2BasicBlocks); +} + +static void mapValueToSlot(const Value *V, ModuleSlotTracker &MST, + DenseMap<unsigned, const Value *> &Slots2Values) { + int Slot = MST.getLocalSlot(V); + if (Slot == -1) + return; + Slots2Values.insert(std::make_pair(unsigned(Slot), V)); +} + +/// Creates the mapping from slot numbers to function's unnamed IR values. +static void initSlots2Values(const Function &F, + DenseMap<unsigned, const Value *> &Slots2Values) { + ModuleSlotTracker MST(F.getParent(), /*ShouldInitializeAllMetadata=*/false); + MST.incorporateFunction(F); + for (const auto &Arg : F.args()) + mapValueToSlot(&Arg, MST, Slots2Values); + for (const auto &BB : F) { + mapValueToSlot(&BB, MST, Slots2Values); + for (const auto &I : BB) + mapValueToSlot(&I, MST, Slots2Values); + } +} + +const Value *MIParser::getIRValue(unsigned Slot) { + if (Slots2Values.empty()) + initSlots2Values(*MF.getFunction(), Slots2Values); + auto ValueInfo = Slots2Values.find(Slot); + if (ValueInfo == Slots2Values.end()) + return nullptr; + return ValueInfo->second; +} + +void MIParser::initNames2TargetIndices() { + if (!Names2TargetIndices.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) + Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getTargetIndex(StringRef Name, int &Index) { + initNames2TargetIndices(); + auto IndexInfo = Names2TargetIndices.find(Name); + if (IndexInfo == Names2TargetIndices.end()) + return true; + Index = IndexInfo->second; + return false; +} + +void MIParser::initNames2DirectTargetFlags() { + if (!Names2DirectTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2DirectTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) { + initNames2DirectTargetFlags(); + auto FlagInfo = Names2DirectTargetFlags.find(Name); + if (FlagInfo == Names2DirectTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +void MIParser::initNames2BitmaskTargetFlags() { + if (!Names2BitmaskTargetFlags.empty()) + return; + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "Expected target instruction info"); + auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &I : Flags) + Names2BitmaskTargetFlags.insert( + std::make_pair(StringRef(I.second), I.first)); +} + +bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) { + initNames2BitmaskTargetFlags(); + auto FlagInfo = Names2BitmaskTargetFlags.find(Name); + if (FlagInfo == Names2BitmaskTargetFlags.end()) + return true; + Flag = FlagInfo->second; + return false; +} + +bool llvm::parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, + PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + SourceMgr SM; + SM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), + SMLoc()); + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseBasicBlockDefinitions(PFS.MBBSlots); +} + +bool llvm::parseMachineInstructions(MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + SourceMgr SM; + SM.AddNewSourceBuffer( + MemoryBuffer::getMemBuffer(Src, "", /*RequiresNullTerminator=*/false), + SMLoc()); + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseBasicBlocks(); +} + +bool llvm::parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMBB(MBB); +} + +bool llvm::parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneNamedRegister(Reg); +} + +bool llvm::parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneVirtualRegister(Reg); +} + +bool llvm::parseStackObjectReference(int &FI, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots) + .parseStandaloneStackObject(FI); +} + +bool llvm::parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, + StringRef Src, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error) { + return MIParser(SM, MF, Error, Src, PFS, IRSlots).parseStandaloneMDNode(Node); +} diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h new file mode 100644 index 0000000..8aef704 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h @@ -0,0 +1,99 @@ +//===- MIParser.h - Machine Instructions Parser ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the function that parses the machine instructions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H +#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" + +namespace llvm { + +class BasicBlock; +class MachineBasicBlock; +class MachineInstr; +class MachineFunction; +class MDNode; +struct SlotMapping; +class SMDiagnostic; +class SourceMgr; + +struct PerFunctionMIParsingState { + DenseMap<unsigned, MachineBasicBlock *> MBBSlots; + DenseMap<unsigned, unsigned> VirtualRegisterSlots; + DenseMap<unsigned, int> FixedStackObjectSlots; + DenseMap<unsigned, int> StackObjectSlots; + DenseMap<unsigned, unsigned> ConstantPoolSlots; + DenseMap<unsigned, unsigned> JumpTableSlots; +}; + +/// Parse the machine basic block definitions, and skip the machine +/// instructions. +/// +/// This function runs the first parsing pass on the machine function's body. +/// It parses only the machine basic block definitions and creates the machine +/// basic blocks in the given machine function. +/// +/// The machine instructions aren't parsed during the first pass because all +/// the machine basic blocks aren't defined yet - this makes it impossible to +/// resolve the machine basic block references. +/// +/// Return true if an error occurred. +bool parseMachineBasicBlockDefinitions(MachineFunction &MF, StringRef Src, + PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +/// Parse the machine instructions. +/// +/// This function runs the second parsing pass on the machine function's body. +/// It skips the machine basic block definitions and parses only the machine +/// instructions and basic block attributes like liveins and successors. +/// +/// The second parsing pass assumes that the first parsing pass already ran +/// on the given source string. +/// +/// Return true if an error occurred. +bool parseMachineInstructions(MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + +bool parseMBBReference(MachineBasicBlock *&MBB, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + +bool parseNamedRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +bool parseVirtualRegisterReference(unsigned &Reg, SourceMgr &SM, + MachineFunction &MF, StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, + SMDiagnostic &Error); + +bool parseStackObjectReference(int &FI, SourceMgr &SM, MachineFunction &MF, + StringRef Src, + const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + +bool parseMDNode(MDNode *&Node, SourceMgr &SM, MachineFunction &MF, + StringRef Src, const PerFunctionMIParsingState &PFS, + const SlotMapping &IRSlots, SMDiagnostic &Error); + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp new file mode 100644 index 0000000..422efbc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp @@ -0,0 +1,739 @@ +//===- MIRParser.cpp - MIR serialization format parser implementation -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that parses the optional LLVM IR and machine +// functions that are stored in MIR files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MIRParser/MIRParser.h" +#include "MIParser.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/AsmParser/Parser.h" +#include "llvm/AsmParser/SlotMapping.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/SMLoc.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/YAMLTraits.h" +#include <memory> + +using namespace llvm; + +namespace llvm { + +/// This class implements the parsing of LLVM IR that's embedded inside a MIR +/// file. +class MIRParserImpl { + SourceMgr SM; + StringRef Filename; + LLVMContext &Context; + StringMap<std::unique_ptr<yaml::MachineFunction>> Functions; + SlotMapping IRSlots; + /// Maps from register class names to register classes. + StringMap<const TargetRegisterClass *> Names2RegClasses; + +public: + MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, StringRef Filename, + LLVMContext &Context); + + void reportDiagnostic(const SMDiagnostic &Diag); + + /// Report an error with the given message at unknown location. + /// + /// Always returns true. + bool error(const Twine &Message); + + /// Report an error with the given message at the given location. + /// + /// Always returns true. + bool error(SMLoc Loc, const Twine &Message); + + /// Report a given error with the location translated from the location in an + /// embedded string literal to a location in the MIR file. + /// + /// Always returns true. + bool error(const SMDiagnostic &Error, SMRange SourceRange); + + /// Try to parse the optional LLVM module and the machine functions in the MIR + /// file. + /// + /// Return null if an error occurred. + std::unique_ptr<Module> parse(); + + /// Parse the machine function in the current YAML document. + /// + /// \param NoLLVMIR - set to true when the MIR file doesn't have LLVM IR. + /// A dummy IR function is created and inserted into the given module when + /// this parameter is true. + /// + /// Return true if an error occurred. + bool parseMachineFunction(yaml::Input &In, Module &M, bool NoLLVMIR); + + /// Initialize the machine function to the state that's described in the MIR + /// file. + /// + /// Return true if error occurred. + bool initializeMachineFunction(MachineFunction &MF); + + bool initializeRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS); + + void inferRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF); + + bool initializeFrameInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS); + + bool parseCalleeSavedRegister(MachineFunction &MF, + PerFunctionMIParsingState &PFS, + std::vector<CalleeSavedInfo> &CSIInfo, + const yaml::StringValue &RegisterSource, + int FrameIdx); + + bool parseStackObjectsDebugInfo(MachineFunction &MF, + PerFunctionMIParsingState &PFS, + const yaml::MachineStackObject &Object, + int FrameIdx); + + bool initializeConstantPool(MachineConstantPool &ConstantPool, + const yaml::MachineFunction &YamlMF, + const MachineFunction &MF, + DenseMap<unsigned, unsigned> &ConstantPoolSlots); + + bool initializeJumpTableInfo(MachineFunction &MF, + const yaml::MachineJumpTable &YamlJTI, + PerFunctionMIParsingState &PFS); + +private: + bool parseMDNode(MDNode *&Node, const yaml::StringValue &Source, + MachineFunction &MF, const PerFunctionMIParsingState &PFS); + + bool parseMBBReference(MachineBasicBlock *&MBB, + const yaml::StringValue &Source, MachineFunction &MF, + const PerFunctionMIParsingState &PFS); + + /// Return a MIR diagnostic converted from an MI string diagnostic. + SMDiagnostic diagFromMIStringDiag(const SMDiagnostic &Error, + SMRange SourceRange); + + /// Return a MIR diagnostic converted from a diagnostic located in a YAML + /// block scalar string. + SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange); + + /// Create an empty function with the given name. + void createDummyFunction(StringRef Name, Module &M); + + void initNames2RegClasses(const MachineFunction &MF); + + /// Check if the given identifier is a name of a register class. + /// + /// Return null if the name isn't a register class. + const TargetRegisterClass *getRegClass(const MachineFunction &MF, + StringRef Name); +}; + +} // end namespace llvm + +MIRParserImpl::MIRParserImpl(std::unique_ptr<MemoryBuffer> Contents, + StringRef Filename, LLVMContext &Context) + : SM(), Filename(Filename), Context(Context) { + SM.AddNewSourceBuffer(std::move(Contents), SMLoc()); +} + +bool MIRParserImpl::error(const Twine &Message) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, SMDiagnostic(Filename, SourceMgr::DK_Error, Message.str()))); + return true; +} + +bool MIRParserImpl::error(SMLoc Loc, const Twine &Message) { + Context.diagnose(DiagnosticInfoMIRParser( + DS_Error, SM.GetMessage(Loc, SourceMgr::DK_Error, Message))); + return true; +} + +bool MIRParserImpl::error(const SMDiagnostic &Error, SMRange SourceRange) { + assert(Error.getKind() == SourceMgr::DK_Error && "Expected an error"); + reportDiagnostic(diagFromMIStringDiag(Error, SourceRange)); + return true; +} + +void MIRParserImpl::reportDiagnostic(const SMDiagnostic &Diag) { + DiagnosticSeverity Kind; + switch (Diag.getKind()) { + case SourceMgr::DK_Error: + Kind = DS_Error; + break; + case SourceMgr::DK_Warning: + Kind = DS_Warning; + break; + case SourceMgr::DK_Note: + Kind = DS_Note; + break; + } + Context.diagnose(DiagnosticInfoMIRParser(Kind, Diag)); +} + +static void handleYAMLDiag(const SMDiagnostic &Diag, void *Context) { + reinterpret_cast<MIRParserImpl *>(Context)->reportDiagnostic(Diag); +} + +std::unique_ptr<Module> MIRParserImpl::parse() { + yaml::Input In(SM.getMemoryBuffer(SM.getMainFileID())->getBuffer(), + /*Ctxt=*/nullptr, handleYAMLDiag, this); + In.setContext(&In); + + if (!In.setCurrentDocument()) { + if (In.error()) + return nullptr; + // Create an empty module when the MIR file is empty. + return llvm::make_unique<Module>(Filename, Context); + } + + std::unique_ptr<Module> M; + bool NoLLVMIR = false; + // Parse the block scalar manually so that we can return unique pointer + // without having to go trough YAML traits. + if (const auto *BSN = + dyn_cast_or_null<yaml::BlockScalarNode>(In.getCurrentNode())) { + SMDiagnostic Error; + M = parseAssembly(MemoryBufferRef(BSN->getValue(), Filename), Error, + Context, &IRSlots); + if (!M) { + reportDiagnostic(diagFromBlockStringDiag(Error, BSN->getSourceRange())); + return M; + } + In.nextDocument(); + if (!In.setCurrentDocument()) + return M; + } else { + // Create an new, empty module. + M = llvm::make_unique<Module>(Filename, Context); + NoLLVMIR = true; + } + + // Parse the machine functions. + do { + if (parseMachineFunction(In, *M, NoLLVMIR)) + return nullptr; + In.nextDocument(); + } while (In.setCurrentDocument()); + + return M; +} + +bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M, + bool NoLLVMIR) { + auto MF = llvm::make_unique<yaml::MachineFunction>(); + yaml::yamlize(In, *MF, false); + if (In.error()) + return true; + auto FunctionName = MF->Name; + if (Functions.find(FunctionName) != Functions.end()) + return error(Twine("redefinition of machine function '") + FunctionName + + "'"); + Functions.insert(std::make_pair(FunctionName, std::move(MF))); + if (NoLLVMIR) + createDummyFunction(FunctionName, M); + else if (!M.getFunction(FunctionName)) + return error(Twine("function '") + FunctionName + + "' isn't defined in the provided LLVM IR"); + return false; +} + +void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) { + auto &Context = M.getContext(); + Function *F = cast<Function>(M.getOrInsertFunction( + Name, FunctionType::get(Type::getVoidTy(Context), false))); + BasicBlock *BB = BasicBlock::Create(Context, "entry", F); + new UnreachableInst(Context, BB); +} + +bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) { + auto It = Functions.find(MF.getName()); + if (It == Functions.end()) + return error(Twine("no machine function information for function '") + + MF.getName() + "' in the MIR file"); + // TODO: Recreate the machine function. + const yaml::MachineFunction &YamlMF = *It->getValue(); + if (YamlMF.Alignment) + MF.setAlignment(YamlMF.Alignment); + MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice); + MF.setHasInlineAsm(YamlMF.HasInlineAsm); + PerFunctionMIParsingState PFS; + if (initializeRegisterInfo(MF, YamlMF, PFS)) + return true; + if (!YamlMF.Constants.empty()) { + auto *ConstantPool = MF.getConstantPool(); + assert(ConstantPool && "Constant pool must be created"); + if (initializeConstantPool(*ConstantPool, YamlMF, MF, + PFS.ConstantPoolSlots)) + return true; + } + + SMDiagnostic Error; + if (parseMachineBasicBlockDefinitions(MF, YamlMF.Body.Value.Value, PFS, + IRSlots, Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; + } + + if (MF.empty()) + return error(Twine("machine function '") + Twine(MF.getName()) + + "' requires at least one machine basic block in its body"); + // Initialize the frame information after creating all the MBBs so that the + // MBB references in the frame information can be resolved. + if (initializeFrameInfo(MF, YamlMF, PFS)) + return true; + // Initialize the jump table after creating all the MBBs so that the MBB + // references can be resolved. + if (!YamlMF.JumpTableInfo.Entries.empty() && + initializeJumpTableInfo(MF, YamlMF.JumpTableInfo, PFS)) + return true; + // Parse the machine instructions after creating all of the MBBs so that the + // parser can resolve the MBB references. + if (parseMachineInstructions(MF, YamlMF.Body.Value.Value, PFS, IRSlots, + Error)) { + reportDiagnostic( + diagFromBlockStringDiag(Error, YamlMF.Body.Value.SourceRange)); + return true; + } + inferRegisterInfo(MF, YamlMF); + // FIXME: This is a temporary workaround until the reserved registers can be + // serialized. + MF.getRegInfo().freezeReservedRegs(MF); + MF.verify(); + return false; +} + +bool MIRParserImpl::initializeRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + assert(RegInfo.isSSA()); + if (!YamlMF.IsSSA) + RegInfo.leaveSSA(); + assert(RegInfo.tracksLiveness()); + if (!YamlMF.TracksRegLiveness) + RegInfo.invalidateLiveness(); + RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness); + + SMDiagnostic Error; + // Parse the virtual register information. + for (const auto &VReg : YamlMF.VirtualRegisters) { + const auto *RC = getRegClass(MF, VReg.Class.Value); + if (!RC) + return error(VReg.Class.SourceRange.Start, + Twine("use of undefined register class '") + + VReg.Class.Value + "'"); + unsigned Reg = RegInfo.createVirtualRegister(RC); + if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg)) + .second) + return error(VReg.ID.SourceRange.Start, + Twine("redefinition of virtual register '%") + + Twine(VReg.ID.Value) + "'"); + if (!VReg.PreferredRegister.Value.empty()) { + unsigned PreferredReg = 0; + if (parseNamedRegisterReference(PreferredReg, SM, MF, + VReg.PreferredRegister.Value, PFS, + IRSlots, Error)) + return error(Error, VReg.PreferredRegister.SourceRange); + RegInfo.setSimpleHint(Reg, PreferredReg); + } + } + + // Parse the liveins. + for (const auto &LiveIn : YamlMF.LiveIns) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, LiveIn.Register.Value, PFS, + IRSlots, Error)) + return error(Error, LiveIn.Register.SourceRange); + unsigned VReg = 0; + if (!LiveIn.VirtualRegister.Value.empty()) { + if (parseVirtualRegisterReference( + VReg, SM, MF, LiveIn.VirtualRegister.Value, PFS, IRSlots, Error)) + return error(Error, LiveIn.VirtualRegister.SourceRange); + } + RegInfo.addLiveIn(Reg, VReg); + } + + // Parse the callee saved register mask. + BitVector CalleeSavedRegisterMask(RegInfo.getUsedPhysRegsMask().size()); + if (!YamlMF.CalleeSavedRegisters) + return false; + for (const auto &RegSource : YamlMF.CalleeSavedRegisters.getValue()) { + unsigned Reg = 0; + if (parseNamedRegisterReference(Reg, SM, MF, RegSource.Value, PFS, IRSlots, + Error)) + return error(Error, RegSource.SourceRange); + CalleeSavedRegisterMask[Reg] = true; + } + RegInfo.setUsedPhysRegMask(CalleeSavedRegisterMask.flip()); + return false; +} + +void MIRParserImpl::inferRegisterInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF) { + if (YamlMF.CalleeSavedRegisters) + return; + for (const MachineBasicBlock &MBB : MF) { + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isRegMask()) + continue; + MF.getRegInfo().addPhysRegsUsedFromRegMask(MO.getRegMask()); + } + } + } +} + +bool MIRParserImpl::initializeFrameInfo(MachineFunction &MF, + const yaml::MachineFunction &YamlMF, + PerFunctionMIParsingState &PFS) { + MachineFrameInfo &MFI = *MF.getFrameInfo(); + const Function &F = *MF.getFunction(); + const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo; + MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken); + MFI.setReturnAddressIsTaken(YamlMFI.IsReturnAddressTaken); + MFI.setHasStackMap(YamlMFI.HasStackMap); + MFI.setHasPatchPoint(YamlMFI.HasPatchPoint); + MFI.setStackSize(YamlMFI.StackSize); + MFI.setOffsetAdjustment(YamlMFI.OffsetAdjustment); + if (YamlMFI.MaxAlignment) + MFI.ensureMaxAlignment(YamlMFI.MaxAlignment); + MFI.setAdjustsStack(YamlMFI.AdjustsStack); + MFI.setHasCalls(YamlMFI.HasCalls); + MFI.setMaxCallFrameSize(YamlMFI.MaxCallFrameSize); + MFI.setHasOpaqueSPAdjustment(YamlMFI.HasOpaqueSPAdjustment); + MFI.setHasVAStart(YamlMFI.HasVAStart); + MFI.setHasMustTailInVarArgFunc(YamlMFI.HasMustTailInVarArgFunc); + if (!YamlMFI.SavePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, YamlMFI.SavePoint, MF, PFS)) + return true; + MFI.setSavePoint(MBB); + } + if (!YamlMFI.RestorePoint.Value.empty()) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, YamlMFI.RestorePoint, MF, PFS)) + return true; + MFI.setRestorePoint(MBB); + } + + std::vector<CalleeSavedInfo> CSIInfo; + // Initialize the fixed frame objects. + for (const auto &Object : YamlMF.FixedStackObjects) { + int ObjectIdx; + if (Object.Type != yaml::FixedMachineStackObject::SpillSlot) + ObjectIdx = MFI.CreateFixedObject(Object.Size, Object.Offset, + Object.IsImmutable, Object.IsAliased); + else + ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset); + MFI.setObjectAlignment(ObjectIdx, Object.Alignment); + if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value, + ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of fixed stack object '%fixed-stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + ObjectIdx)) + return true; + } + + // Initialize the ordinary frame objects. + for (const auto &Object : YamlMF.StackObjects) { + int ObjectIdx; + const AllocaInst *Alloca = nullptr; + const yaml::StringValue &Name = Object.Name; + if (!Name.Value.empty()) { + Alloca = dyn_cast_or_null<AllocaInst>( + F.getValueSymbolTable().lookup(Name.Value)); + if (!Alloca) + return error(Name.SourceRange.Start, + "alloca instruction named '" + Name.Value + + "' isn't defined in the function '" + F.getName() + + "'"); + } + if (Object.Type == yaml::MachineStackObject::VariableSized) + ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca); + else + ObjectIdx = MFI.CreateStackObject( + Object.Size, Object.Alignment, + Object.Type == yaml::MachineStackObject::SpillSlot, Alloca); + MFI.setObjectOffset(ObjectIdx, Object.Offset); + if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx)) + .second) + return error(Object.ID.SourceRange.Start, + Twine("redefinition of stack object '%stack.") + + Twine(Object.ID.Value) + "'"); + if (parseCalleeSavedRegister(MF, PFS, CSIInfo, Object.CalleeSavedRegister, + ObjectIdx)) + return true; + if (Object.LocalOffset) + MFI.mapLocalFrameObject(ObjectIdx, Object.LocalOffset.getValue()); + if (parseStackObjectsDebugInfo(MF, PFS, Object, ObjectIdx)) + return true; + } + MFI.setCalleeSavedInfo(CSIInfo); + if (!CSIInfo.empty()) + MFI.setCalleeSavedInfoValid(true); + + // Initialize the various stack object references after initializing the + // stack objects. + if (!YamlMFI.StackProtector.Value.empty()) { + SMDiagnostic Error; + int FI; + if (parseStackObjectReference(FI, SM, MF, YamlMFI.StackProtector.Value, PFS, + IRSlots, Error)) + return error(Error, YamlMFI.StackProtector.SourceRange); + MFI.setStackProtectorIndex(FI); + } + return false; +} + +bool MIRParserImpl::parseCalleeSavedRegister( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + std::vector<CalleeSavedInfo> &CSIInfo, + const yaml::StringValue &RegisterSource, int FrameIdx) { + if (RegisterSource.Value.empty()) + return false; + unsigned Reg = 0; + SMDiagnostic Error; + if (parseNamedRegisterReference(Reg, SM, MF, RegisterSource.Value, PFS, + IRSlots, Error)) + return error(Error, RegisterSource.SourceRange); + CSIInfo.push_back(CalleeSavedInfo(Reg, FrameIdx)); + return false; +} + +/// Verify that given node is of a certain type. Return true on error. +template <typename T> +static bool typecheckMDNode(T *&Result, MDNode *Node, + const yaml::StringValue &Source, + StringRef TypeString, MIRParserImpl &Parser) { + if (!Node) + return false; + Result = dyn_cast<T>(Node); + if (!Result) + return Parser.error(Source.SourceRange.Start, + "expected a reference to a '" + TypeString + + "' metadata node"); + return false; +} + +bool MIRParserImpl::parseStackObjectsDebugInfo( + MachineFunction &MF, PerFunctionMIParsingState &PFS, + const yaml::MachineStackObject &Object, int FrameIdx) { + // Debug information can only be attached to stack objects; Fixed stack + // objects aren't supported. + assert(FrameIdx >= 0 && "Expected a stack object frame index"); + MDNode *Var = nullptr, *Expr = nullptr, *Loc = nullptr; + if (parseMDNode(Var, Object.DebugVar, MF, PFS) || + parseMDNode(Expr, Object.DebugExpr, MF, PFS) || + parseMDNode(Loc, Object.DebugLoc, MF, PFS)) + return true; + if (!Var && !Expr && !Loc) + return false; + DILocalVariable *DIVar = nullptr; + DIExpression *DIExpr = nullptr; + DILocation *DILoc = nullptr; + if (typecheckMDNode(DIVar, Var, Object.DebugVar, "DILocalVariable", *this) || + typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) || + typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this)) + return true; + MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc); + return false; +} + +bool MIRParserImpl::parseMDNode(MDNode *&Node, const yaml::StringValue &Source, + MachineFunction &MF, + const PerFunctionMIParsingState &PFS) { + if (Source.Value.empty()) + return false; + SMDiagnostic Error; + if (llvm::parseMDNode(Node, SM, MF, Source.Value, PFS, IRSlots, Error)) + return error(Error, Source.SourceRange); + return false; +} + +bool MIRParserImpl::initializeConstantPool( + MachineConstantPool &ConstantPool, const yaml::MachineFunction &YamlMF, + const MachineFunction &MF, + DenseMap<unsigned, unsigned> &ConstantPoolSlots) { + const auto &M = *MF.getFunction()->getParent(); + SMDiagnostic Error; + for (const auto &YamlConstant : YamlMF.Constants) { + const Constant *Value = dyn_cast_or_null<Constant>( + parseConstantValue(YamlConstant.Value.Value, Error, M)); + if (!Value) + return error(Error, YamlConstant.Value.SourceRange); + unsigned Alignment = + YamlConstant.Alignment + ? YamlConstant.Alignment + : M.getDataLayout().getPrefTypeAlignment(Value->getType()); + unsigned Index = ConstantPool.getConstantPoolIndex(Value, Alignment); + if (!ConstantPoolSlots.insert(std::make_pair(YamlConstant.ID.Value, Index)) + .second) + return error(YamlConstant.ID.SourceRange.Start, + Twine("redefinition of constant pool item '%const.") + + Twine(YamlConstant.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::initializeJumpTableInfo( + MachineFunction &MF, const yaml::MachineJumpTable &YamlJTI, + PerFunctionMIParsingState &PFS) { + MachineJumpTableInfo *JTI = MF.getOrCreateJumpTableInfo(YamlJTI.Kind); + for (const auto &Entry : YamlJTI.Entries) { + std::vector<MachineBasicBlock *> Blocks; + for (const auto &MBBSource : Entry.Blocks) { + MachineBasicBlock *MBB = nullptr; + if (parseMBBReference(MBB, MBBSource.Value, MF, PFS)) + return true; + Blocks.push_back(MBB); + } + unsigned Index = JTI->createJumpTableIndex(Blocks); + if (!PFS.JumpTableSlots.insert(std::make_pair(Entry.ID.Value, Index)) + .second) + return error(Entry.ID.SourceRange.Start, + Twine("redefinition of jump table entry '%jump-table.") + + Twine(Entry.ID.Value) + "'"); + } + return false; +} + +bool MIRParserImpl::parseMBBReference(MachineBasicBlock *&MBB, + const yaml::StringValue &Source, + MachineFunction &MF, + const PerFunctionMIParsingState &PFS) { + SMDiagnostic Error; + if (llvm::parseMBBReference(MBB, SM, MF, Source.Value, PFS, IRSlots, Error)) + return error(Error, Source.SourceRange); + return false; +} + +SMDiagnostic MIRParserImpl::diagFromMIStringDiag(const SMDiagnostic &Error, + SMRange SourceRange) { + assert(SourceRange.isValid() && "Invalid source range"); + SMLoc Loc = SourceRange.Start; + bool HasQuote = Loc.getPointer() < SourceRange.End.getPointer() && + *Loc.getPointer() == '\''; + // Translate the location of the error from the location in the MI string to + // the corresponding location in the MIR file. + Loc = Loc.getFromPointer(Loc.getPointer() + Error.getColumnNo() + + (HasQuote ? 1 : 0)); + + // TODO: Translate any source ranges as well. + return SM.GetMessage(Loc, Error.getKind(), Error.getMessage(), None, + Error.getFixIts()); +} + +SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error, + SMRange SourceRange) { + assert(SourceRange.isValid()); + + // Translate the location of the error from the location in the llvm IR string + // to the corresponding location in the MIR file. + auto LineAndColumn = SM.getLineAndColumn(SourceRange.Start); + unsigned Line = LineAndColumn.first + Error.getLineNo() - 1; + unsigned Column = Error.getColumnNo(); + StringRef LineStr = Error.getLineContents(); + SMLoc Loc = Error.getLoc(); + + // Get the full line and adjust the column number by taking the indentation of + // LLVM IR into account. + for (line_iterator L(*SM.getMemoryBuffer(SM.getMainFileID()), false), E; + L != E; ++L) { + if (L.line_number() == Line) { + LineStr = *L; + Loc = SMLoc::getFromPointer(LineStr.data()); + auto Indent = LineStr.find(Error.getLineContents()); + if (Indent != StringRef::npos) + Column += Indent; + break; + } + } + + return SMDiagnostic(SM, Loc, Filename, Line, Column, Error.getKind(), + Error.getMessage(), LineStr, Error.getRanges(), + Error.getFixIts()); +} + +void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) { + if (!Names2RegClasses.empty()) + return; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) { + const auto *RC = TRI->getRegClass(I); + Names2RegClasses.insert( + std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC)); + } +} + +const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF, + StringRef Name) { + initNames2RegClasses(MF); + auto RegClassInfo = Names2RegClasses.find(Name); + if (RegClassInfo == Names2RegClasses.end()) + return nullptr; + return RegClassInfo->getValue(); +} + +MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl) + : Impl(std::move(Impl)) {} + +MIRParser::~MIRParser() {} + +std::unique_ptr<Module> MIRParser::parseLLVMModule() { return Impl->parse(); } + +bool MIRParser::initializeMachineFunction(MachineFunction &MF) { + return Impl->initializeMachineFunction(MF); +} + +std::unique_ptr<MIRParser> llvm::createMIRParserFromFile(StringRef Filename, + SMDiagnostic &Error, + LLVMContext &Context) { + auto FileOrErr = MemoryBuffer::getFile(Filename); + if (std::error_code EC = FileOrErr.getError()) { + Error = SMDiagnostic(Filename, SourceMgr::DK_Error, + "Could not open input file: " + EC.message()); + return nullptr; + } + return createMIRParser(std::move(FileOrErr.get()), Context); +} + +std::unique_ptr<MIRParser> +llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents, + LLVMContext &Context) { + auto Filename = Contents->getBufferIdentifier(); + return llvm::make_unique<MIRParser>( + llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context)); +} diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp new file mode 100644 index 0000000..175cb0d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp @@ -0,0 +1,979 @@ +//===- MIRPrinter.cpp - MIR serialization format printer ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that prints out the LLVM IR and machine +// functions using the MIR serialization format. +// +//===----------------------------------------------------------------------===// + +#include "MIRPrinter.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/YAMLTraits.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +namespace { + +/// This structure describes how to print out stack object references. +struct FrameIndexOperand { + std::string Name; + unsigned ID; + bool IsFixed; + + FrameIndexOperand(StringRef Name, unsigned ID, bool IsFixed) + : Name(Name.str()), ID(ID), IsFixed(IsFixed) {} + + /// Return an ordinary stack object reference. + static FrameIndexOperand create(StringRef Name, unsigned ID) { + return FrameIndexOperand(Name, ID, /*IsFixed=*/false); + } + + /// Return a fixed stack object reference. + static FrameIndexOperand createFixed(unsigned ID) { + return FrameIndexOperand("", ID, /*IsFixed=*/true); + } +}; + +} // end anonymous namespace + +namespace llvm { + +/// This class prints out the machine functions using the MIR serialization +/// format. +class MIRPrinter { + raw_ostream &OS; + DenseMap<const uint32_t *, unsigned> RegisterMaskIds; + /// Maps from stack object indices to operand indices which will be used when + /// printing frame index machine operands. + DenseMap<int, FrameIndexOperand> StackObjectOperandMapping; + +public: + MIRPrinter(raw_ostream &OS) : OS(OS) {} + + void print(const MachineFunction &MF); + + void convert(yaml::MachineFunction &MF, const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI); + void convert(ModuleSlotTracker &MST, yaml::MachineFrameInfo &YamlMFI, + const MachineFrameInfo &MFI); + void convert(yaml::MachineFunction &MF, + const MachineConstantPool &ConstantPool); + void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI, + const MachineJumpTableInfo &JTI); + void convertStackObjects(yaml::MachineFunction &MF, + const MachineFrameInfo &MFI, MachineModuleInfo &MMI, + ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI); + +private: + void initRegisterMaskIds(const MachineFunction &MF); +}; + +/// This class prints out the machine instructions using the MIR serialization +/// format. +class MIPrinter { + raw_ostream &OS; + ModuleSlotTracker &MST; + const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds; + const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping; + +public: + MIPrinter(raw_ostream &OS, ModuleSlotTracker &MST, + const DenseMap<const uint32_t *, unsigned> &RegisterMaskIds, + const DenseMap<int, FrameIndexOperand> &StackObjectOperandMapping) + : OS(OS), MST(MST), RegisterMaskIds(RegisterMaskIds), + StackObjectOperandMapping(StackObjectOperandMapping) {} + + void print(const MachineBasicBlock &MBB); + + void print(const MachineInstr &MI); + void printMBBReference(const MachineBasicBlock &MBB); + void printIRBlockReference(const BasicBlock &BB); + void printIRValueReference(const Value &V); + void printStackObjectReference(int FrameIndex); + void printOffset(int64_t Offset); + void printTargetFlags(const MachineOperand &Op); + void print(const MachineOperand &Op, const TargetRegisterInfo *TRI, + unsigned I, bool ShouldPrintRegisterTies, bool IsDef = false); + void print(const MachineMemOperand &Op); + + void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI); +}; + +} // end namespace llvm + +namespace llvm { +namespace yaml { + +/// This struct serializes the LLVM IR module. +template <> struct BlockScalarTraits<Module> { + static void output(const Module &Mod, void *Ctxt, raw_ostream &OS) { + Mod.print(OS, nullptr); + } + static StringRef input(StringRef Str, void *Ctxt, Module &Mod) { + llvm_unreachable("LLVM Module is supposed to be parsed separately"); + return ""; + } +}; + +} // end namespace yaml +} // end namespace llvm + +static void printReg(unsigned Reg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + // TODO: Print Stack Slots. + if (!Reg) + OS << '_'; + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << '%' << TargetRegisterInfo::virtReg2Index(Reg); + else if (Reg < TRI->getNumRegs()) + OS << '%' << StringRef(TRI->getName(Reg)).lower(); + else + llvm_unreachable("Can't print this kind of register yet"); +} + +static void printReg(unsigned Reg, yaml::StringValue &Dest, + const TargetRegisterInfo *TRI) { + raw_string_ostream OS(Dest.Value); + printReg(Reg, OS, TRI); +} + +void MIRPrinter::print(const MachineFunction &MF) { + initRegisterMaskIds(MF); + + yaml::MachineFunction YamlMF; + YamlMF.Name = MF.getName(); + YamlMF.Alignment = MF.getAlignment(); + YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice(); + YamlMF.HasInlineAsm = MF.hasInlineAsm(); + convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo()); + ModuleSlotTracker MST(MF.getFunction()->getParent()); + MST.incorporateFunction(*MF.getFunction()); + convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo()); + convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST, + MF.getSubtarget().getRegisterInfo()); + if (const auto *ConstantPool = MF.getConstantPool()) + convert(YamlMF, *ConstantPool); + if (const auto *JumpTableInfo = MF.getJumpTableInfo()) + convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo); + raw_string_ostream StrOS(YamlMF.Body.Value.Value); + bool IsNewlineNeeded = false; + for (const auto &MBB : MF) { + if (IsNewlineNeeded) + StrOS << "\n"; + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .print(MBB); + IsNewlineNeeded = true; + } + StrOS.flush(); + yaml::Output Out(OS); + Out << YamlMF; +} + +void MIRPrinter::convert(yaml::MachineFunction &MF, + const MachineRegisterInfo &RegInfo, + const TargetRegisterInfo *TRI) { + MF.IsSSA = RegInfo.isSSA(); + MF.TracksRegLiveness = RegInfo.tracksLiveness(); + MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled(); + + // Print the virtual register definitions. + for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + yaml::VirtualRegisterDefinition VReg; + VReg.ID = I; + VReg.Class = + StringRef(TRI->getRegClassName(RegInfo.getRegClass(Reg))).lower(); + unsigned PreferredReg = RegInfo.getSimpleHint(Reg); + if (PreferredReg) + printReg(PreferredReg, VReg.PreferredRegister, TRI); + MF.VirtualRegisters.push_back(VReg); + } + + // Print the live ins. + for (auto I = RegInfo.livein_begin(), E = RegInfo.livein_end(); I != E; ++I) { + yaml::MachineFunctionLiveIn LiveIn; + printReg(I->first, LiveIn.Register, TRI); + if (I->second) + printReg(I->second, LiveIn.VirtualRegister, TRI); + MF.LiveIns.push_back(LiveIn); + } + // The used physical register mask is printed as an inverted callee saved + // register mask. + const BitVector &UsedPhysRegMask = RegInfo.getUsedPhysRegsMask(); + if (UsedPhysRegMask.none()) + return; + std::vector<yaml::FlowStringValue> CalleeSavedRegisters; + for (unsigned I = 0, E = UsedPhysRegMask.size(); I != E; ++I) { + if (!UsedPhysRegMask[I]) { + yaml::FlowStringValue Reg; + printReg(I, Reg, TRI); + CalleeSavedRegisters.push_back(Reg); + } + } + MF.CalleeSavedRegisters = CalleeSavedRegisters; +} + +void MIRPrinter::convert(ModuleSlotTracker &MST, + yaml::MachineFrameInfo &YamlMFI, + const MachineFrameInfo &MFI) { + YamlMFI.IsFrameAddressTaken = MFI.isFrameAddressTaken(); + YamlMFI.IsReturnAddressTaken = MFI.isReturnAddressTaken(); + YamlMFI.HasStackMap = MFI.hasStackMap(); + YamlMFI.HasPatchPoint = MFI.hasPatchPoint(); + YamlMFI.StackSize = MFI.getStackSize(); + YamlMFI.OffsetAdjustment = MFI.getOffsetAdjustment(); + YamlMFI.MaxAlignment = MFI.getMaxAlignment(); + YamlMFI.AdjustsStack = MFI.adjustsStack(); + YamlMFI.HasCalls = MFI.hasCalls(); + YamlMFI.MaxCallFrameSize = MFI.getMaxCallFrameSize(); + YamlMFI.HasOpaqueSPAdjustment = MFI.hasOpaqueSPAdjustment(); + YamlMFI.HasVAStart = MFI.hasVAStart(); + YamlMFI.HasMustTailInVarArgFunc = MFI.hasMustTailInVarArgFunc(); + if (MFI.getSavePoint()) { + raw_string_ostream StrOS(YamlMFI.SavePoint.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MFI.getSavePoint()); + } + if (MFI.getRestorePoint()) { + raw_string_ostream StrOS(YamlMFI.RestorePoint.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MFI.getRestorePoint()); + } +} + +void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF, + const MachineFrameInfo &MFI, + MachineModuleInfo &MMI, + ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI) { + // Process fixed stack objects. + unsigned ID = 0; + for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + + yaml::FixedMachineStackObject YamlObject; + YamlObject.ID = ID; + YamlObject.Type = MFI.isSpillSlotObjectIndex(I) + ? yaml::FixedMachineStackObject::SpillSlot + : yaml::FixedMachineStackObject::DefaultType; + YamlObject.Offset = MFI.getObjectOffset(I); + YamlObject.Size = MFI.getObjectSize(I); + YamlObject.Alignment = MFI.getObjectAlignment(I); + YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I); + YamlObject.IsAliased = MFI.isAliasedObjectIndex(I); + MF.FixedStackObjects.push_back(YamlObject); + StackObjectOperandMapping.insert( + std::make_pair(I, FrameIndexOperand::createFixed(ID++))); + } + + // Process ordinary stack objects. + ID = 0; + for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) { + if (MFI.isDeadObjectIndex(I)) + continue; + + yaml::MachineStackObject YamlObject; + YamlObject.ID = ID; + if (const auto *Alloca = MFI.getObjectAllocation(I)) + YamlObject.Name.Value = + Alloca->hasName() ? Alloca->getName() : "<unnamed alloca>"; + YamlObject.Type = MFI.isSpillSlotObjectIndex(I) + ? yaml::MachineStackObject::SpillSlot + : MFI.isVariableSizedObjectIndex(I) + ? yaml::MachineStackObject::VariableSized + : yaml::MachineStackObject::DefaultType; + YamlObject.Offset = MFI.getObjectOffset(I); + YamlObject.Size = MFI.getObjectSize(I); + YamlObject.Alignment = MFI.getObjectAlignment(I); + + MF.StackObjects.push_back(YamlObject); + StackObjectOperandMapping.insert(std::make_pair( + I, FrameIndexOperand::create(YamlObject.Name.Value, ID++))); + } + + for (const auto &CSInfo : MFI.getCalleeSavedInfo()) { + yaml::StringValue Reg; + printReg(CSInfo.getReg(), Reg, TRI); + auto StackObjectInfo = StackObjectOperandMapping.find(CSInfo.getFrameIdx()); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + if (StackObject.IsFixed) + MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg; + else + MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg; + } + for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) { + auto LocalObject = MFI.getLocalFrameObjectMap(I); + auto StackObjectInfo = StackObjectOperandMapping.find(LocalObject.first); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + assert(!StackObject.IsFixed && "Expected a locally mapped stack object"); + MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second; + } + + // Print the stack object references in the frame information class after + // converting the stack objects. + if (MFI.hasStackProtectorIndex()) { + raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printStackObjectReference(MFI.getStackProtectorIndex()); + } + + // Print the debug variable information. + for (MachineModuleInfo::VariableDbgInfo &DebugVar : + MMI.getVariableDbgInfo()) { + auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot); + assert(StackObjectInfo != StackObjectOperandMapping.end() && + "Invalid stack object index"); + const FrameIndexOperand &StackObject = StackObjectInfo->second; + assert(!StackObject.IsFixed && "Expected a non-fixed stack object"); + auto &Object = MF.StackObjects[StackObject.ID]; + { + raw_string_ostream StrOS(Object.DebugVar.Value); + DebugVar.Var->printAsOperand(StrOS, MST); + } + { + raw_string_ostream StrOS(Object.DebugExpr.Value); + DebugVar.Expr->printAsOperand(StrOS, MST); + } + { + raw_string_ostream StrOS(Object.DebugLoc.Value); + DebugVar.Loc->printAsOperand(StrOS, MST); + } + } +} + +void MIRPrinter::convert(yaml::MachineFunction &MF, + const MachineConstantPool &ConstantPool) { + unsigned ID = 0; + for (const MachineConstantPoolEntry &Constant : ConstantPool.getConstants()) { + // TODO: Serialize target specific constant pool entries. + if (Constant.isMachineConstantPoolEntry()) + llvm_unreachable("Can't print target specific constant pool entries yet"); + + yaml::MachineConstantPoolValue YamlConstant; + std::string Str; + raw_string_ostream StrOS(Str); + Constant.Val.ConstVal->printAsOperand(StrOS); + YamlConstant.ID = ID++; + YamlConstant.Value = StrOS.str(); + YamlConstant.Alignment = Constant.getAlignment(); + MF.Constants.push_back(YamlConstant); + } +} + +void MIRPrinter::convert(ModuleSlotTracker &MST, + yaml::MachineJumpTable &YamlJTI, + const MachineJumpTableInfo &JTI) { + YamlJTI.Kind = JTI.getEntryKind(); + unsigned ID = 0; + for (const auto &Table : JTI.getJumpTables()) { + std::string Str; + yaml::MachineJumpTable::Entry Entry; + Entry.ID = ID++; + for (const auto *MBB : Table.MBBs) { + raw_string_ostream StrOS(Str); + MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping) + .printMBBReference(*MBB); + Entry.Blocks.push_back(StrOS.str()); + Str.clear(); + } + YamlJTI.Entries.push_back(Entry); + } +} + +void MIRPrinter::initRegisterMaskIds(const MachineFunction &MF) { + const auto *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned I = 0; + for (const uint32_t *Mask : TRI->getRegMasks()) + RegisterMaskIds.insert(std::make_pair(Mask, I++)); +} + +void MIPrinter::print(const MachineBasicBlock &MBB) { + assert(MBB.getNumber() >= 0 && "Invalid MBB number"); + OS << "bb." << MBB.getNumber(); + bool HasAttributes = false; + if (const auto *BB = MBB.getBasicBlock()) { + if (BB->hasName()) { + OS << "." << BB->getName(); + } else { + HasAttributes = true; + OS << " ("; + int Slot = MST.getLocalSlot(BB); + if (Slot == -1) + OS << "<ir-block badref>"; + else + OS << (Twine("%ir-block.") + Twine(Slot)).str(); + } + } + if (MBB.hasAddressTaken()) { + OS << (HasAttributes ? ", " : " ("); + OS << "address-taken"; + HasAttributes = true; + } + if (MBB.isEHPad()) { + OS << (HasAttributes ? ", " : " ("); + OS << "landing-pad"; + HasAttributes = true; + } + if (MBB.getAlignment()) { + OS << (HasAttributes ? ", " : " ("); + OS << "align " << MBB.getAlignment(); + HasAttributes = true; + } + if (HasAttributes) + OS << ")"; + OS << ":\n"; + + bool HasLineAttributes = false; + // Print the successors + if (!MBB.succ_empty()) { + OS.indent(2) << "successors: "; + for (auto I = MBB.succ_begin(), E = MBB.succ_end(); I != E; ++I) { + if (I != MBB.succ_begin()) + OS << ", "; + printMBBReference(**I); + if (MBB.hasSuccessorProbabilities()) + OS << '(' << MBB.getSuccProbability(I) << ')'; + } + OS << "\n"; + HasLineAttributes = true; + } + + // Print the live in registers. + const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); + assert(TRI && "Expected target register info"); + if (!MBB.livein_empty()) { + OS.indent(2) << "liveins: "; + bool First = true; + for (const auto &LI : MBB.liveins()) { + if (!First) + OS << ", "; + First = false; + printReg(LI.PhysReg, OS, TRI); + if (LI.LaneMask != ~0u) + OS << ':' << PrintLaneMask(LI.LaneMask); + } + OS << "\n"; + HasLineAttributes = true; + } + + if (HasLineAttributes) + OS << "\n"; + bool IsInBundle = false; + for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; ++I) { + const MachineInstr &MI = *I; + if (IsInBundle && !MI.isInsideBundle()) { + OS.indent(2) << "}\n"; + IsInBundle = false; + } + OS.indent(IsInBundle ? 4 : 2); + print(MI); + if (!IsInBundle && MI.getFlag(MachineInstr::BundledSucc)) { + OS << " {"; + IsInBundle = true; + } + OS << "\n"; + } + if (IsInBundle) + OS.indent(2) << "}\n"; +} + +/// Return true when an instruction has tied register that can't be determined +/// by the instruction's descriptor. +static bool hasComplexRegisterTies(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const auto &Operand = MI.getOperand(I); + if (!Operand.isReg() || Operand.isDef()) + // Ignore the defined registers as MCID marks only the uses as tied. + continue; + int ExpectedTiedIdx = MCID.getOperandConstraint(I, MCOI::TIED_TO); + int TiedIdx = Operand.isTied() ? int(MI.findTiedOperandIdx(I)) : -1; + if (ExpectedTiedIdx != TiedIdx) + return true; + } + return false; +} + +void MIPrinter::print(const MachineInstr &MI) { + const auto &SubTarget = MI.getParent()->getParent()->getSubtarget(); + const auto *TRI = SubTarget.getRegisterInfo(); + assert(TRI && "Expected target register info"); + const auto *TII = SubTarget.getInstrInfo(); + assert(TII && "Expected target instruction info"); + if (MI.isCFIInstruction()) + assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction"); + + bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI); + unsigned I = 0, E = MI.getNumOperands(); + for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() && + !MI.getOperand(I).isImplicit(); + ++I) { + if (I) + OS << ", "; + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, /*IsDef=*/true); + } + + if (I) + OS << " = "; + if (MI.getFlag(MachineInstr::FrameSetup)) + OS << "frame-setup "; + OS << TII->getName(MI.getOpcode()); + if (I < E) + OS << ' '; + + bool NeedComma = false; + for (; I < E; ++I) { + if (NeedComma) + OS << ", "; + print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies); + NeedComma = true; + } + + if (MI.getDebugLoc()) { + if (NeedComma) + OS << ','; + OS << " debug-location "; + MI.getDebugLoc()->printAsOperand(OS, MST); + } + + if (!MI.memoperands_empty()) { + OS << " :: "; + bool NeedComma = false; + for (const auto *Op : MI.memoperands()) { + if (NeedComma) + OS << ", "; + print(*Op); + NeedComma = true; + } + } +} + +void MIPrinter::printMBBReference(const MachineBasicBlock &MBB) { + OS << "%bb." << MBB.getNumber(); + if (const auto *BB = MBB.getBasicBlock()) { + if (BB->hasName()) + OS << '.' << BB->getName(); + } +} + +static void printIRSlotNumber(raw_ostream &OS, int Slot) { + if (Slot == -1) + OS << "<badref>"; + else + OS << Slot; +} + +void MIPrinter::printIRBlockReference(const BasicBlock &BB) { + OS << "%ir-block."; + if (BB.hasName()) { + printLLVMNameWithoutPrefix(OS, BB.getName()); + return; + } + const Function *F = BB.getParent(); + int Slot; + if (F == MST.getCurrentFunction()) { + Slot = MST.getLocalSlot(&BB); + } else { + ModuleSlotTracker CustomMST(F->getParent(), + /*ShouldInitializeAllMetadata=*/false); + CustomMST.incorporateFunction(*F); + Slot = CustomMST.getLocalSlot(&BB); + } + printIRSlotNumber(OS, Slot); +} + +void MIPrinter::printIRValueReference(const Value &V) { + if (isa<GlobalValue>(V)) { + V.printAsOperand(OS, /*PrintType=*/false, MST); + return; + } + if (isa<Constant>(V)) { + // Machine memory operands can load/store to/from constant value pointers. + OS << '`'; + V.printAsOperand(OS, /*PrintType=*/true, MST); + OS << '`'; + return; + } + OS << "%ir."; + if (V.hasName()) { + printLLVMNameWithoutPrefix(OS, V.getName()); + return; + } + printIRSlotNumber(OS, MST.getLocalSlot(&V)); +} + +void MIPrinter::printStackObjectReference(int FrameIndex) { + auto ObjectInfo = StackObjectOperandMapping.find(FrameIndex); + assert(ObjectInfo != StackObjectOperandMapping.end() && + "Invalid frame index"); + const FrameIndexOperand &Operand = ObjectInfo->second; + if (Operand.IsFixed) { + OS << "%fixed-stack." << Operand.ID; + return; + } + OS << "%stack." << Operand.ID; + if (!Operand.Name.empty()) + OS << '.' << Operand.Name; +} + +void MIPrinter::printOffset(int64_t Offset) { + if (Offset == 0) + return; + if (Offset < 0) { + OS << " - " << -Offset; + return; + } + OS << " + " << Offset; +} + +static const char *getTargetFlagName(const TargetInstrInfo *TII, unsigned TF) { + auto Flags = TII->getSerializableDirectMachineOperandTargetFlags(); + for (const auto &I : Flags) { + if (I.first == TF) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::printTargetFlags(const MachineOperand &Op) { + if (!Op.getTargetFlags()) + return; + const auto *TII = + Op.getParent()->getParent()->getParent()->getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Flags = TII->decomposeMachineOperandsTargetFlags(Op.getTargetFlags()); + OS << "target-flags("; + const bool HasDirectFlags = Flags.first; + const bool HasBitmaskFlags = Flags.second; + if (!HasDirectFlags && !HasBitmaskFlags) { + OS << "<unknown>) "; + return; + } + if (HasDirectFlags) { + if (const auto *Name = getTargetFlagName(TII, Flags.first)) + OS << Name; + else + OS << "<unknown target flag>"; + } + if (!HasBitmaskFlags) { + OS << ") "; + return; + } + bool IsCommaNeeded = HasDirectFlags; + unsigned BitMask = Flags.second; + auto BitMasks = TII->getSerializableBitmaskMachineOperandTargetFlags(); + for (const auto &Mask : BitMasks) { + // Check if the flag's bitmask has the bits of the current mask set. + if ((BitMask & Mask.first) == Mask.first) { + if (IsCommaNeeded) + OS << ", "; + IsCommaNeeded = true; + OS << Mask.second; + // Clear the bits which were serialized from the flag's bitmask. + BitMask &= ~(Mask.first); + } + } + if (BitMask) { + // When the resulting flag's bitmask isn't zero, we know that we didn't + // serialize all of the bit flags. + if (IsCommaNeeded) + OS << ", "; + OS << "<unknown bitmask target flag>"; + } + OS << ") "; +} + +static const char *getTargetIndexName(const MachineFunction &MF, int Index) { + const auto *TII = MF.getSubtarget().getInstrInfo(); + assert(TII && "expected instruction info"); + auto Indices = TII->getSerializableTargetIndices(); + for (const auto &I : Indices) { + if (I.first == Index) { + return I.second; + } + } + return nullptr; +} + +void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI, + unsigned I, bool ShouldPrintRegisterTies, bool IsDef) { + printTargetFlags(Op); + switch (Op.getType()) { + case MachineOperand::MO_Register: + if (Op.isImplicit()) + OS << (Op.isDef() ? "implicit-def " : "implicit "); + else if (!IsDef && Op.isDef()) + // Print the 'def' flag only when the operand is defined after '='. + OS << "def "; + if (Op.isInternalRead()) + OS << "internal "; + if (Op.isDead()) + OS << "dead "; + if (Op.isKill()) + OS << "killed "; + if (Op.isUndef()) + OS << "undef "; + if (Op.isEarlyClobber()) + OS << "early-clobber "; + if (Op.isDebug()) + OS << "debug-use "; + printReg(Op.getReg(), OS, TRI); + // Print the sub register. + if (Op.getSubReg() != 0) + OS << ':' << TRI->getSubRegIndexName(Op.getSubReg()); + if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef()) + OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")"; + break; + case MachineOperand::MO_Immediate: + OS << Op.getImm(); + break; + case MachineOperand::MO_CImmediate: + Op.getCImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_FPImmediate: + Op.getFPImm()->printAsOperand(OS, /*PrintType=*/true, MST); + break; + case MachineOperand::MO_MachineBasicBlock: + printMBBReference(*Op.getMBB()); + break; + case MachineOperand::MO_FrameIndex: + printStackObjectReference(Op.getIndex()); + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "%const." << Op.getIndex(); + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_TargetIndex: { + OS << "target-index("; + if (const auto *Name = getTargetIndexName( + *Op.getParent()->getParent()->getParent(), Op.getIndex())) + OS << Name; + else + OS << "<unknown>"; + OS << ')'; + printOffset(Op.getOffset()); + break; + } + case MachineOperand::MO_JumpTableIndex: + OS << "%jump-table." << Op.getIndex(); + break; + case MachineOperand::MO_ExternalSymbol: + OS << '$'; + printLLVMNameWithoutPrefix(OS, Op.getSymbolName()); + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_GlobalAddress: + Op.getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_BlockAddress: + OS << "blockaddress("; + Op.getBlockAddress()->getFunction()->printAsOperand(OS, /*PrintType=*/false, + MST); + OS << ", "; + printIRBlockReference(*Op.getBlockAddress()->getBasicBlock()); + OS << ')'; + printOffset(Op.getOffset()); + break; + case MachineOperand::MO_RegisterMask: { + auto RegMaskInfo = RegisterMaskIds.find(Op.getRegMask()); + if (RegMaskInfo != RegisterMaskIds.end()) + OS << StringRef(TRI->getRegMaskNames()[RegMaskInfo->second]).lower(); + else + llvm_unreachable("Can't print this machine register mask yet."); + break; + } + case MachineOperand::MO_RegisterLiveOut: { + const uint32_t *RegMask = Op.getRegLiveOut(); + OS << "liveout("; + bool IsCommaNeeded = false; + for (unsigned Reg = 0, E = TRI->getNumRegs(); Reg < E; ++Reg) { + if (RegMask[Reg / 32] & (1U << (Reg % 32))) { + if (IsCommaNeeded) + OS << ", "; + printReg(Reg, OS, TRI); + IsCommaNeeded = true; + } + } + OS << ")"; + break; + } + case MachineOperand::MO_Metadata: + Op.getMetadata()->printAsOperand(OS, MST); + break; + case MachineOperand::MO_MCSymbol: + OS << "<mcsymbol " << *Op.getMCSymbol() << ">"; + break; + case MachineOperand::MO_CFIIndex: { + const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI(); + print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI); + break; + } + } +} + +void MIPrinter::print(const MachineMemOperand &Op) { + OS << '('; + // TODO: Print operand's target specific flags. + if (Op.isVolatile()) + OS << "volatile "; + if (Op.isNonTemporal()) + OS << "non-temporal "; + if (Op.isInvariant()) + OS << "invariant "; + if (Op.isLoad()) + OS << "load "; + else { + assert(Op.isStore() && "Non load machine operand must be a store"); + OS << "store "; + } + OS << Op.getSize() << (Op.isLoad() ? " from " : " into "); + if (const Value *Val = Op.getValue()) { + printIRValueReference(*Val); + } else { + const PseudoSourceValue *PVal = Op.getPseudoValue(); + assert(PVal && "Expected a pseudo source value"); + switch (PVal->kind()) { + case PseudoSourceValue::Stack: + OS << "stack"; + break; + case PseudoSourceValue::GOT: + OS << "got"; + break; + case PseudoSourceValue::JumpTable: + OS << "jump-table"; + break; + case PseudoSourceValue::ConstantPool: + OS << "constant-pool"; + break; + case PseudoSourceValue::FixedStack: + printStackObjectReference( + cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex()); + break; + case PseudoSourceValue::GlobalValueCallEntry: + OS << "call-entry "; + cast<GlobalValuePseudoSourceValue>(PVal)->getValue()->printAsOperand( + OS, /*PrintType=*/false, MST); + break; + case PseudoSourceValue::ExternalSymbolCallEntry: + OS << "call-entry $"; + printLLVMNameWithoutPrefix( + OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol()); + break; + } + } + printOffset(Op.getOffset()); + if (Op.getBaseAlignment() != Op.getSize()) + OS << ", align " << Op.getBaseAlignment(); + auto AAInfo = Op.getAAInfo(); + if (AAInfo.TBAA) { + OS << ", !tbaa "; + AAInfo.TBAA->printAsOperand(OS, MST); + } + if (AAInfo.Scope) { + OS << ", !alias.scope "; + AAInfo.Scope->printAsOperand(OS, MST); + } + if (AAInfo.NoAlias) { + OS << ", !noalias "; + AAInfo.NoAlias->printAsOperand(OS, MST); + } + if (Op.getRanges()) { + OS << ", !range "; + Op.getRanges()->printAsOperand(OS, MST); + } + OS << ')'; +} + +static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS, + const TargetRegisterInfo *TRI) { + int Reg = TRI->getLLVMRegNum(DwarfReg, true); + if (Reg == -1) { + OS << "<badreg>"; + return; + } + printReg(Reg, OS, TRI); +} + +void MIPrinter::print(const MCCFIInstruction &CFI, + const TargetRegisterInfo *TRI) { + switch (CFI.getOperation()) { + case MCCFIInstruction::OpSameValue: + OS << ".cfi_same_value "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpOffset: + OS << ".cfi_offset "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfaRegister: + OS << ".cfi_def_cfa_register "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + break; + case MCCFIInstruction::OpDefCfaOffset: + OS << ".cfi_def_cfa_offset "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + OS << CFI.getOffset(); + break; + case MCCFIInstruction::OpDefCfa: + OS << ".cfi_def_cfa "; + if (CFI.getLabel()) + OS << "<mcsymbol> "; + printCFIRegister(CFI.getRegister(), OS, TRI); + OS << ", " << CFI.getOffset(); + break; + default: + // TODO: Print the other CFI Operations. + OS << "<unserializable cfi operation>"; + break; + } +} + +void llvm::printMIR(raw_ostream &OS, const Module &M) { + yaml::Output Out(OS); + Out << const_cast<Module &>(M); +} + +void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) { + MIRPrinter Printer(OS); + Printer.print(MF); +} diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.h b/contrib/llvm/lib/CodeGen/MIRPrinter.h new file mode 100644 index 0000000..16aa903 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRPrinter.h @@ -0,0 +1,33 @@ +//===- MIRPrinter.h - MIR serialization format printer --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the functions that print out the LLVM IR and the machine +// functions using the MIR serialization format. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_MIRPRINTER_H +#define LLVM_LIB_CODEGEN_MIRPRINTER_H + +namespace llvm { + +class MachineFunction; +class Module; +class raw_ostream; + +/// Print LLVM IR using the MIR serialization format to the given output stream. +void printMIR(raw_ostream &OS, const Module &M); + +/// Print a machine function using the MIR serialization format to the given +/// output stream. +void printMIR(raw_ostream &OS, const MachineFunction &MF); + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp new file mode 100644 index 0000000..8e7566a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp @@ -0,0 +1,71 @@ +//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass that prints out the LLVM module using the MIR +// serialization format. +// +//===----------------------------------------------------------------------===// + +#include "MIRPrinter.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MIRYamlMapping.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +/// This pass prints out the LLVM IR to an output stream using the MIR +/// serialization format. +struct MIRPrintingPass : public MachineFunctionPass { + static char ID; + raw_ostream &OS; + std::string MachineFunctions; + + MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {} + MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {} + + const char *getPassName() const override { return "MIR Printing Pass"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + std::string Str; + raw_string_ostream StrOS(Str); + printMIR(StrOS, MF); + MachineFunctions.append(StrOS.str()); + return false; + } + + bool doFinalization(Module &M) override { + printMIR(OS, M); + OS << MachineFunctions; + return false; + } +}; + +char MIRPrintingPass::ID = 0; + +} // end anonymous namespace + +char &llvm::MIRPrintingPassID = MIRPrintingPass::ID; +INITIALIZE_PASS(MIRPrintingPass, "mir-printer", "MIR Printer", false, false) + +namespace llvm { + +MachineFunctionPass *createPrintMIRPass(raw_ostream &OS) { + return new MIRPrintingPass(OS); +} + +} // end namespace llvm diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp new file mode 100644 index 0000000..76099f2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -0,0 +1,1268 @@ +//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect the sequence of machine instructions for a basic block. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "codegen" + +MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B) + : BB(B), Number(-1), xParent(&MF) { + Insts.Parent = this; +} + +MachineBasicBlock::~MachineBasicBlock() { +} + +/// Return the MCSymbol for this basic block. +MCSymbol *MachineBasicBlock::getSymbol() const { + if (!CachedMCSymbol) { + const MachineFunction *MF = getParent(); + MCContext &Ctx = MF->getContext(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix(); + assert(getNumber() >= 0 && "cannot get label for unreachable MBB"); + CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" + + Twine(MF->getFunctionNumber()) + + "_" + Twine(getNumber())); + } + + return CachedMCSymbol; +} + + +raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) { + MBB.print(OS); + return OS; +} + +/// When an MBB is added to an MF, we need to update the parent pointer of the +/// MBB, the MBB numbering, and any instructions in the MBB to be on the right +/// operand list for registers. +/// +/// MBBs start out as #-1. When a MBB is added to a MachineFunction, it +/// gets the next available unique MBB number. If it is removed from a +/// MachineFunction, it goes back to being #-1. +void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) { + MachineFunction &MF = *N->getParent(); + N->Number = MF.addToMBBNumbering(N); + + // Make sure the instructions have their operands in the reginfo lists. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (MachineBasicBlock::instr_iterator + I = N->instr_begin(), E = N->instr_end(); I != E; ++I) + I->AddRegOperandsToUseLists(RegInfo); +} + +void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) { + N->getParent()->removeFromMBBNumbering(N->Number); + N->Number = -1; +} + +/// When we add an instruction to a basic block list, we update its parent +/// pointer and add its operands from reg use/def lists if appropriate. +void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) { + assert(!N->getParent() && "machine instruction already in a basic block"); + N->setParent(Parent); + + // Add the instruction's register operands to their corresponding + // use/def lists. + MachineFunction *MF = Parent->getParent(); + N->AddRegOperandsToUseLists(MF->getRegInfo()); +} + +/// When we remove an instruction from a basic block list, we update its parent +/// pointer and remove its operands from reg use/def lists if appropriate. +void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) { + assert(N->getParent() && "machine instruction not in a basic block"); + + // Remove from the use/def lists. + if (MachineFunction *MF = N->getParent()->getParent()) + N->RemoveRegOperandsFromUseLists(MF->getRegInfo()); + + N->setParent(nullptr); +} + +/// When moving a range of instructions from one MBB list to another, we need to +/// update the parent pointers and the use/def lists. +void ilist_traits<MachineInstr>:: +transferNodesFromList(ilist_traits<MachineInstr> &FromList, + ilist_iterator<MachineInstr> First, + ilist_iterator<MachineInstr> Last) { + assert(Parent->getParent() == FromList.Parent->getParent() && + "MachineInstr parent mismatch!"); + + // Splice within the same MBB -> no change. + if (Parent == FromList.Parent) return; + + // If splicing between two blocks within the same function, just update the + // parent pointers. + for (; First != Last; ++First) + First->setParent(Parent); +} + +void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) { + assert(!MI->getParent() && "MI is still in a block!"); + Parent->getParent()->DeleteMachineInstr(MI); +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { + instr_iterator I = instr_begin(), E = instr_end(); + while (I != E && I->isPHI()) + ++I; + assert((I == E || !I->isInsideBundle()) && + "First non-phi MI cannot be inside a bundle!"); + return I; +} + +MachineBasicBlock::iterator +MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) { + iterator E = end(); + while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue())) + ++I; + // FIXME: This needs to change if we wish to bundle labels / dbg_values + // inside the bundle. + assert((I == E || !I->isInsideBundle()) && + "First non-phi / non-label instruction is inside a bundle!"); + return I; +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { + iterator B = begin(), E = end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() { + instr_iterator B = instr_begin(), E = instr_end(), I = E; + while (I != B && ((--I)->isTerminator() || I->isDebugValue())) + ; /*noop */ + while (I != E && !I->isTerminator()) + ++I; + return I; +} + +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() { + // Skip over begin-of-block dbg_value instructions. + iterator I = begin(), E = end(); + while (I != E && I->isDebugValue()) + ++I; + return I; +} + +MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() { + // Skip over end-of-block dbg_value instructions. + instr_iterator B = instr_begin(), I = instr_end(); + while (I != B) { + --I; + // Return instruction that starts a bundle. + if (I->isDebugValue() || I->isInsideBundle()) + continue; + return I; + } + // The block is all debug values. + return end(); +} + +const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const { + // A block with a landing pad successor only has one other successor. + if (succ_size() > 2) + return nullptr; + for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) + if ((*I)->isEHPad()) + return *I; + return nullptr; +} + +bool MachineBasicBlock::hasEHPadSuccessor() const { + for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I) + if ((*I)->isEHPad()) + return true; + return false; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineBasicBlock::dump() const { + print(dbgs()); +} +#endif + +StringRef MachineBasicBlock::getName() const { + if (const BasicBlock *LBB = getBasicBlock()) + return LBB->getName(); + else + return "(null)"; +} + +/// Return a hopefully unique identifier for this block. +std::string MachineBasicBlock::getFullName() const { + std::string Name; + if (getParent()) + Name = (getParent()->getName() + ":").str(); + if (getBasicBlock()) + Name += getBasicBlock()->getName(); + else + Name += ("BB" + Twine(getNumber())).str(); + return Name; +} + +void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { + const MachineFunction *MF = getParent(); + if (!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + const Function *F = MF->getFunction(); + const Module *M = F ? F->getParent() : nullptr; + ModuleSlotTracker MST(M); + print(OS, MST, Indexes); +} + +void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST, + SlotIndexes *Indexes) const { + const MachineFunction *MF = getParent(); + if (!MF) { + OS << "Can't print out MachineBasicBlock because parent MachineFunction" + << " is null\n"; + return; + } + + if (Indexes) + OS << Indexes->getMBBStartIdx(this) << '\t'; + + OS << "BB#" << getNumber() << ": "; + + const char *Comma = ""; + if (const BasicBlock *LBB = getBasicBlock()) { + OS << Comma << "derived from LLVM BB "; + LBB->printAsOperand(OS, /*PrintType=*/false, MST); + Comma = ", "; + } + if (isEHPad()) { OS << Comma << "EH LANDING PAD"; Comma = ", "; } + if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; } + if (Alignment) + OS << Comma << "Align " << Alignment << " (" << (1u << Alignment) + << " bytes)"; + + OS << '\n'; + + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + if (!livein_empty()) { + if (Indexes) OS << '\t'; + OS << " Live Ins:"; + for (const auto &LI : make_range(livein_begin(), livein_end())) { + OS << ' ' << PrintReg(LI.PhysReg, TRI); + if (LI.LaneMask != ~0u) + OS << ':' << PrintLaneMask(LI.LaneMask); + } + OS << '\n'; + } + // Print the preds of this block according to the CFG. + if (!pred_empty()) { + if (Indexes) OS << '\t'; + OS << " Predecessors according to CFG:"; + for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI) + OS << " BB#" << (*PI)->getNumber(); + OS << '\n'; + } + + for (const_instr_iterator I = instr_begin(); I != instr_end(); ++I) { + if (Indexes) { + if (Indexes->hasIndex(&*I)) + OS << Indexes->getInstructionIndex(&*I); + OS << '\t'; + } + OS << '\t'; + if (I->isInsideBundle()) + OS << " * "; + I->print(OS, MST); + } + + // Print the successors of this block according to the CFG. + if (!succ_empty()) { + if (Indexes) OS << '\t'; + OS << " Successors according to CFG:"; + for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI) { + OS << " BB#" << (*SI)->getNumber(); + if (!Probs.empty()) + OS << '(' << *getProbabilityIterator(SI) << ')'; + } + OS << '\n'; + } +} + +void MachineBasicBlock::printAsOperand(raw_ostream &OS, + bool /*PrintType*/) const { + OS << "BB#" << getNumber(); +} + +void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) { + LiveInVector::iterator I = std::find_if( + LiveIns.begin(), LiveIns.end(), + [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + if (I == LiveIns.end()) + return; + + I->LaneMask &= ~LaneMask; + if (I->LaneMask == 0) + LiveIns.erase(I); +} + +bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const { + livein_iterator I = std::find_if( + LiveIns.begin(), LiveIns.end(), + [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; }); + return I != livein_end() && (I->LaneMask & LaneMask) != 0; +} + +void MachineBasicBlock::sortUniqueLiveIns() { + std::sort(LiveIns.begin(), LiveIns.end(), + [](const RegisterMaskPair &LI0, const RegisterMaskPair &LI1) { + return LI0.PhysReg < LI1.PhysReg; + }); + // Liveins are sorted by physreg now we can merge their lanemasks. + LiveInVector::const_iterator I = LiveIns.begin(); + LiveInVector::const_iterator J; + LiveInVector::iterator Out = LiveIns.begin(); + for (; I != LiveIns.end(); ++Out, I = J) { + unsigned PhysReg = I->PhysReg; + LaneBitmask LaneMask = I->LaneMask; + for (J = std::next(I); J != LiveIns.end() && J->PhysReg == PhysReg; ++J) + LaneMask |= J->LaneMask; + Out->PhysReg = PhysReg; + Out->LaneMask = LaneMask; + } + LiveIns.erase(Out, LiveIns.end()); +} + +unsigned +MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) { + assert(getParent() && "MBB must be inserted in function"); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg"); + assert(RC && "Register class is required"); + assert((isEHPad() || this == &getParent()->front()) && + "Only the entry block and landing pads can have physreg live ins"); + + bool LiveIn = isLiveIn(PhysReg); + iterator I = SkipPHIsAndLabels(begin()), E = end(); + MachineRegisterInfo &MRI = getParent()->getRegInfo(); + const TargetInstrInfo &TII = *getParent()->getSubtarget().getInstrInfo(); + + // Look for an existing copy. + if (LiveIn) + for (;I != E && I->isCopy(); ++I) + if (I->getOperand(1).getReg() == PhysReg) { + unsigned VirtReg = I->getOperand(0).getReg(); + if (!MRI.constrainRegClass(VirtReg, RC)) + llvm_unreachable("Incompatible live-in register class."); + return VirtReg; + } + + // No luck, create a virtual register. + unsigned VirtReg = MRI.createVirtualRegister(RC); + BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg) + .addReg(PhysReg, RegState::Kill); + if (!LiveIn) + addLiveIn(PhysReg); + return VirtReg; +} + +void MachineBasicBlock::moveBefore(MachineBasicBlock *NewAfter) { + getParent()->splice(NewAfter->getIterator(), getIterator()); +} + +void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) { + getParent()->splice(++NewBefore->getIterator(), getIterator()); +} + +void MachineBasicBlock::updateTerminator() { + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); + // A block with no successors has no concerns with fall-through edges. + if (this->succ_empty()) return; + + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + DebugLoc DL; // FIXME: this is nowhere + bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); + (void) B; + assert(!B && "UpdateTerminators requires analyzable predecessors!"); + if (Cond.empty()) { + if (TBB) { + // The block has an unconditional branch. If its successor is now + // its layout successor, delete the branch. + if (isLayoutSuccessor(TBB)) + TII->RemoveBranch(*this); + } else { + // The block has an unconditional fallthrough. If its successor is not + // its layout successor, insert a branch. First we have to locate the + // only non-landing-pad successor, as that is the fallthrough block. + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isEHPad()) + continue; + assert(!TBB && "Found more than one non-landing-pad successor!"); + TBB = *SI; + } + + // If there is no non-landing-pad successor, the block has no + // fall-through edges to be concerned with. + if (!TBB) + return; + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + } + } else { + if (FBB) { + // The block has a non-fallthrough conditional branch. If one of its + // successors is its layout successor, rewrite it to a fallthrough + // conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) + return; + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FBB, nullptr, Cond, DL); + } else if (isLayoutSuccessor(FBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + } + } else { + // Walk through the successors and find the successor which is not + // a landing pad and is not the conditional branch destination (in TBB) + // as the fallthrough successor. + MachineBasicBlock *FallthroughBB = nullptr; + for (succ_iterator SI = succ_begin(), SE = succ_end(); SI != SE; ++SI) { + if ((*SI)->isEHPad() || *SI == TBB) + continue; + assert(!FallthroughBB && "Found more than one fallthrough successor."); + FallthroughBB = *SI; + } + if (!FallthroughBB && canFallThrough()) { + // We fallthrough to the same basic block as the conditional jump + // targets. Remove the conditional jump, leaving unconditional + // fallthrough. + // FIXME: This does not seem like a reasonable pattern to support, but + // it has been seen in the wild coming out of degenerate ARM test cases. + TII->RemoveBranch(*this); + + // Finally update the unconditional successor to be reached via a branch + // if it would not be reached by fallthrough. + if (!isLayoutSuccessor(TBB)) + TII->InsertBranch(*this, TBB, nullptr, Cond, DL); + return; + } + + // The block has a fallthrough conditional branch. + if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) { + // We can't reverse the condition, add an unconditional branch. + Cond.clear(); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); + return; + } + TII->RemoveBranch(*this); + TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL); + } else if (!isLayoutSuccessor(FallthroughBB)) { + TII->RemoveBranch(*this); + TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL); + } + } + } +} + +void MachineBasicBlock::validateSuccProbs() const { +#ifndef NDEBUG + int64_t Sum = 0; + for (auto Prob : Probs) + Sum += Prob.getNumerator(); + // Due to precision issue, we assume that the sum of probabilities is one if + // the difference between the sum of their numerators and the denominator is + // no greater than the number of successors. + assert((uint64_t)std::abs(Sum - BranchProbability::getDenominator()) <= + Probs.size() && + "The sum of successors's probabilities exceeds one."); +#endif // NDEBUG +} + +void MachineBasicBlock::addSuccessor(MachineBasicBlock *Succ, + BranchProbability Prob) { + // Probability list is either empty (if successor list isn't empty, this means + // disabled optimization) or has the same size as successor list. + if (!(Probs.empty() && !Successors.empty())) + Probs.push_back(Prob); + Successors.push_back(Succ); + Succ->addPredecessor(this); +} + +void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) { + // We need to make sure probability list is either empty or has the same size + // of successor list. When this function is called, we can safely delete all + // probability in the list. + Probs.clear(); + Successors.push_back(Succ); + Succ->addPredecessor(this); +} + +void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ, + bool NormalizeSuccProbs) { + succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ); + removeSuccessor(I, NormalizeSuccProbs); +} + +MachineBasicBlock::succ_iterator +MachineBasicBlock::removeSuccessor(succ_iterator I, bool NormalizeSuccProbs) { + assert(I != Successors.end() && "Not a current successor!"); + + // If probability list is empty it means we don't use it (disabled + // optimization). + if (!Probs.empty()) { + probability_iterator WI = getProbabilityIterator(I); + Probs.erase(WI); + if (NormalizeSuccProbs) + normalizeSuccProbs(); + } + + (*I)->removePredecessor(this); + return Successors.erase(I); +} + +void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old, + MachineBasicBlock *New) { + if (Old == New) + return; + + succ_iterator E = succ_end(); + succ_iterator NewI = E; + succ_iterator OldI = E; + for (succ_iterator I = succ_begin(); I != E; ++I) { + if (*I == Old) { + OldI = I; + if (NewI != E) + break; + } + if (*I == New) { + NewI = I; + if (OldI != E) + break; + } + } + assert(OldI != E && "Old is not a successor of this block"); + + // If New isn't already a successor, let it take Old's place. + if (NewI == E) { + Old->removePredecessor(this); + New->addPredecessor(this); + *OldI = New; + return; + } + + // New is already a successor. + // Update its probability instead of adding a duplicate edge. + if (!Probs.empty()) { + auto ProbIter = getProbabilityIterator(NewI); + if (!ProbIter->isUnknown()) + *ProbIter += *getProbabilityIterator(OldI); + } + removeSuccessor(OldI); +} + +void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) { + Predecessors.push_back(Pred); +} + +void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) { + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred); + assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); + Predecessors.erase(I); +} + +void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) { + if (this == FromMBB) + return; + + while (!FromMBB->succ_empty()) { + MachineBasicBlock *Succ = *FromMBB->succ_begin(); + + // If probability list is empty it means we don't use it (disabled optimization). + if (!FromMBB->Probs.empty()) { + auto Prob = *FromMBB->Probs.begin(); + addSuccessor(Succ, Prob); + } else + addSuccessorWithoutProb(Succ); + + FromMBB->removeSuccessor(Succ); + } +} + +void +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) { + if (this == FromMBB) + return; + + while (!FromMBB->succ_empty()) { + MachineBasicBlock *Succ = *FromMBB->succ_begin(); + if (!FromMBB->Probs.empty()) { + auto Prob = *FromMBB->Probs.begin(); + addSuccessor(Succ, Prob); + } else + addSuccessorWithoutProb(Succ); + FromMBB->removeSuccessor(Succ); + + // Fix up any PHI nodes in the successor. + for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(), + ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI) + for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI->getOperand(i); + if (MO.getMBB() == FromMBB) + MO.setMBB(this); + } + } + normalizeSuccProbs(); +} + +bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const { + return std::find(pred_begin(), pred_end(), MBB) != pred_end(); +} + +bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { + return std::find(succ_begin(), succ_end(), MBB) != succ_end(); +} + +bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { + MachineFunction::const_iterator I(this); + return std::next(I) == MachineFunction::const_iterator(MBB); +} + +bool MachineBasicBlock::canFallThrough() { + MachineFunction::iterator Fallthrough = getIterator(); + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor, no fallthrough is possible. + if (!isSuccessor(&*Fallthrough)) + return false; + + // Analyze the branches, if any, at the end of the block. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo(); + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) { + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicated check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. + return empty() || !back().isBarrier() || TII->isPredicated(&back()); + } + + // If there is no branch, control always falls through. + if (!TBB) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == nullptr; +} + +MachineBasicBlock * +MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + // Splitting the critical edge to a landing pad block is non-trivial. Don't do + // it in this generic function. + if (Succ->isEHPad()) + return nullptr; + + MachineFunction *MF = getParent(); + DebugLoc DL; // FIXME: this is nowhere + + // Performance might be harmed on HW that implements branching using exec mask + // where both sides of the branches are always executed. + if (MF->getTarget().requiresStructuredCFG()) + return nullptr; + + // We may need to update this's terminator, but we can't do that if + // AnalyzeBranch fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) + return nullptr; + + // Avoid bugpoint weirdness: A block may end with a conditional branch but + // jumps to the same MBB is either case. We have duplicate CFG edges in that + // case that we can't handle. Since this never happens in properly optimized + // code, just skip those edges. + if (TBB && TBB == FBB) { + DEBUG(dbgs() << "Won't split critical edge after degenerate BB#" + << getNumber() << '\n'); + return nullptr; + } + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->insert(std::next(MachineFunction::iterator(this)), NMBB); + DEBUG(dbgs() << "Splitting critical edge:" + " BB#" << getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << Succ->getNumber() << '\n'); + + LiveIntervals *LIS = P->getAnalysisIfAvailable<LiveIntervals>(); + SlotIndexes *Indexes = P->getAnalysisIfAvailable<SlotIndexes>(); + if (LIS) + LIS->insertMBBInMaps(NMBB); + else if (Indexes) + Indexes->insertMBBInMaps(NMBB); + + // On some targets like Mips, branches may kill virtual registers. Make sure + // that LiveVariables is properly updated after updateTerminator replaces the + // terminators. + LiveVariables *LV = P->getAnalysisIfAvailable<LiveVariables>(); + + // Collect a list of virtual registers killed by the terminators. + SmallVector<unsigned, 4> KilledRegs; + if (LV) + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = &*I; + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0 || + !OI->isUse() || !OI->isKill() || OI->isUndef()) + continue; + unsigned Reg = OI->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg) || + LV->getVarInfo(Reg).removeKill(MI)) { + KilledRegs.push_back(Reg); + DEBUG(dbgs() << "Removing terminator kill: " << *MI); + OI->setIsKill(false); + } + } + } + + SmallVector<unsigned, 4> UsedRegs; + if (LIS) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = &*I; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0) + continue; + + unsigned Reg = OI->getReg(); + if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) + UsedRegs.push_back(Reg); + } + } + } + + ReplaceUsesOfBlockWith(Succ, NMBB); + + // If updateTerminator() removes instructions, we need to remove them from + // SlotIndexes. + SmallVector<MachineInstr*, 4> Terminators; + if (Indexes) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + Terminators.push_back(&*I); + } + + updateTerminator(); + + if (Indexes) { + SmallVector<MachineInstr*, 4> NewTerminators; + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + NewTerminators.push_back(&*I); + + for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(), + E = Terminators.end(); I != E; ++I) { + if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == + NewTerminators.end()) + Indexes->removeMachineInstrFromMaps(*I); + } + } + + // Insert unconditional "jump Succ" instruction in NMBB if necessary. + NMBB->addSuccessor(Succ); + if (!NMBB->isLayoutSuccessor(Succ)) { + Cond.clear(); + TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL); + + if (Indexes) { + for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); + I != E; ++I) { + // Some instructions may have been moved to NMBB by updateTerminator(), + // so we first remove any instruction that already has an index. + if (Indexes->hasIndex(&*I)) + Indexes->removeMachineInstrFromMaps(&*I); + Indexes->insertMachineInstrInMaps(&*I); + } + } + } + + // Fix PHI nodes in Succ so they refer to NMBB instead of this + for (MachineBasicBlock::instr_iterator + i = Succ->instr_begin(),e = Succ->instr_end(); + i != e && i->isPHI(); ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == this) + i->getOperand(ni+1).setMBB(NMBB); + + // Inherit live-ins from the successor + for (const auto &LI : Succ->liveins()) + NMBB->addLiveIn(LI); + + // Update LiveVariables. + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + if (LV) { + // Restore kills of virtual registers that were killed by the terminators. + while (!KilledRegs.empty()) { + unsigned Reg = KilledRegs.pop_back_val(); + for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) { + if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false)) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + LV->getVarInfo(Reg).Kills.push_back(&*I); + DEBUG(dbgs() << "Restored terminator kill: " << *I); + break; + } + } + // Update relevant live-through information. + LV->addNewBlock(NMBB, this, Succ); + } + + if (LIS) { + // After splitting the edge and updating SlotIndexes, live intervals may be + // in one of two situations, depending on whether this block was the last in + // the function. If the original block was the last in the function, all + // live intervals will end prior to the beginning of the new split block. If + // the original block was not at the end of the function, all live intervals + // will extend to the end of the new split block. + + bool isLastMBB = + std::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + + SlotIndex StartIndex = Indexes->getMBBEndIdx(this); + SlotIndex PrevIndex = StartIndex.getPrevSlot(); + SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); + + // Find the registers used from NMBB in PHIs in Succ. + SmallSet<unsigned, 8> PHISrcRegs; + for (MachineBasicBlock::instr_iterator + I = Succ->instr_begin(), E = Succ->instr_end(); + I != E && I->isPHI(); ++I) { + for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { + if (I->getOperand(ni+1).getMBB() == NMBB) { + MachineOperand &MO = I->getOperand(ni); + unsigned Reg = MO.getReg(); + PHISrcRegs.insert(Reg); + if (MO.isUndef()) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && + "PHI sources should be live out of their predecessors."); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); + } + } + } + + MachineRegisterInfo *MRI = &getParent()->getRegInfo(); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + if (!LI.liveAt(PrevIndex)) + continue; + + bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); + if (isLiveOut && isLastMBB) { + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "LiveInterval should have VNInfo where it is live."); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); + } else if (!isLiveOut && !isLastMBB) { + LI.removeSegment(StartIndex, EndIndex); + } + } + + // Update all intervals for registers whose uses may have been modified by + // updateTerminator(). + LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); + } + + if (MachineDominatorTree *MDT = + P->getAnalysisIfAvailable<MachineDominatorTree>()) + MDT->recordSplitCriticalEdge(this, Succ, NMBB); + + if (MachineLoopInfo *MLI = P->getAnalysisIfAvailable<MachineLoopInfo>()) + if (MachineLoop *TIL = MLI->getLoopFor(this)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { + if (TIL == DestLoop) { + // Both in the same loop, the NMBB joins loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (MachineLoop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + } + } + } + + return NMBB; +} + +/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's +/// neighboring instructions so the bundle won't be broken by removing MI. +static void unbundleSingleMI(MachineInstr *MI) { + // Removing the first instruction in a bundle. + if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) + MI->unbundleFromSucc(); + // Removing the last instruction in a bundle. + if (MI->isBundledWithPred() && !MI->isBundledWithSucc()) + MI->unbundleFromPred(); + // If MI is not bundled, or if it is internal to a bundle, the neighbor flags + // are already fine. +} + +MachineBasicBlock::instr_iterator +MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) { + unbundleSingleMI(&*I); + return Insts.erase(I); +} + +MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) { + unbundleSingleMI(MI); + MI->clearFlag(MachineInstr::BundledPred); + MI->clearFlag(MachineInstr::BundledSucc); + return Insts.remove(MI); +} + +MachineBasicBlock::instr_iterator +MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) { + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + // Set the bundle flags when inserting inside a bundle. + if (I != instr_end() && I->isBundledWithPred()) { + MI->setFlag(MachineInstr::BundledPred); + MI->setFlag(MachineInstr::BundledSucc); + } + return Insts.insert(I, MI); +} + +/// This method unlinks 'this' from the containing function, and returns it, but +/// does not delete it. +MachineBasicBlock *MachineBasicBlock::removeFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->remove(this); + return this; +} + +/// This method unlinks 'this' from the containing function, and deletes it. +void MachineBasicBlock::eraseFromParent() { + assert(getParent() && "Not embedded in a function!"); + getParent()->erase(this); +} + +/// Given a machine basic block that branched to 'Old', change the code and CFG +/// so that it branches to 'New' instead. +void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Cannot replace self with self!"); + + MachineBasicBlock::instr_iterator I = instr_end(); + while (I != instr_begin()) { + --I; + if (!I->isTerminator()) break; + + // Scan the operands of this machine instruction, replacing any uses of Old + // with New. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (I->getOperand(i).isMBB() && + I->getOperand(i).getMBB() == Old) + I->getOperand(i).setMBB(New); + } + + // Update the successor information. + replaceSuccessor(Old, New); +} + +/// Various pieces of code can cause excess edges in the CFG to be inserted. If +/// we have proven that MBB can only branch to DestA and DestB, remove any other +/// MBB successors from the CFG. DestA and DestB can be null. +/// +/// Besides DestA and DestB, retain other edges leading to LandingPads +/// (currently there can be only one; we don't check or require that here). +/// Note it is possible that DestA and/or DestB are LandingPads. +bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA, + MachineBasicBlock *DestB, + bool IsCond) { + // The values of DestA and DestB frequently come from a call to the + // 'TargetInstrInfo::AnalyzeBranch' method. We take our meaning of the initial + // values from there. + // + // 1. If both DestA and DestB are null, then the block ends with no branches + // (it falls through to its successor). + // 2. If DestA is set, DestB is null, and IsCond is false, then the block ends + // with only an unconditional branch. + // 3. If DestA is set, DestB is null, and IsCond is true, then the block ends + // with a conditional branch that falls through to a successor (DestB). + // 4. If DestA and DestB is set and IsCond is true, then the block ends with a + // conditional branch followed by an unconditional branch. DestA is the + // 'true' destination and DestB is the 'false' destination. + + bool Changed = false; + + MachineFunction::iterator FallThru = std::next(getIterator()); + + if (!DestA && !DestB) { + // Block falls through to successor. + DestA = &*FallThru; + DestB = &*FallThru; + } else if (DestA && !DestB) { + if (IsCond) + // Block ends in conditional jump that falls through to successor. + DestB = &*FallThru; + } else { + assert(DestA && DestB && IsCond && + "CFG in a bad state. Cannot correct CFG edges"); + } + + // Remove superfluous edges. I.e., those which aren't destinations of this + // basic block, duplicate edges, or landing pads. + SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs; + MachineBasicBlock::succ_iterator SI = succ_begin(); + while (SI != succ_end()) { + const MachineBasicBlock *MBB = *SI; + if (!SeenMBBs.insert(MBB).second || + (MBB != DestA && MBB != DestB && !MBB->isEHPad())) { + // This is a superfluous edge, remove it. + SI = removeSuccessor(SI); + Changed = true; + } else { + ++SI; + } + } + + if (Changed) + normalizeSuccProbs(); + return Changed; +} + +/// Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE +/// instructions. Return UnknownLoc if there is none. +DebugLoc +MachineBasicBlock::findDebugLoc(instr_iterator MBBI) { + DebugLoc DL; + instr_iterator E = instr_end(); + if (MBBI == E) + return DL; + + // Skip debug declarations, we don't want a DebugLoc from them. + while (MBBI != E && MBBI->isDebugValue()) + MBBI++; + if (MBBI != E) + DL = MBBI->getDebugLoc(); + return DL; +} + +/// Return probability of the edge from this block to MBB. +BranchProbability +MachineBasicBlock::getSuccProbability(const_succ_iterator Succ) const { + if (Probs.empty()) + return BranchProbability(1, succ_size()); + + const auto &Prob = *getProbabilityIterator(Succ); + if (Prob.isUnknown()) { + // For unknown probabilities, collect the sum of all known ones, and evenly + // ditribute the complemental of the sum to each unknown probability. + unsigned KnownProbNum = 0; + auto Sum = BranchProbability::getZero(); + for (auto &P : Probs) { + if (!P.isUnknown()) { + Sum += P; + KnownProbNum++; + } + } + return Sum.getCompl() / (Probs.size() - KnownProbNum); + } else + return Prob; +} + +/// Set successor probability of a given iterator. +void MachineBasicBlock::setSuccProbability(succ_iterator I, + BranchProbability Prob) { + assert(!Prob.isUnknown()); + if (Probs.empty()) + return; + *getProbabilityIterator(I) = Prob; +} + +/// Return probability iterator corresonding to the I successor iterator +MachineBasicBlock::const_probability_iterator +MachineBasicBlock::getProbabilityIterator( + MachineBasicBlock::const_succ_iterator I) const { + assert(Probs.size() == Successors.size() && "Async probability list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Probs.size() && "Not a current successor!"); + return Probs.begin() + index; +} + +/// Return probability iterator corresonding to the I successor iterator. +MachineBasicBlock::probability_iterator +MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) { + assert(Probs.size() == Successors.size() && "Async probability list!"); + const size_t index = std::distance(Successors.begin(), I); + assert(index < Probs.size() && "Not a current successor!"); + return Probs.begin() + index; +} + +/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed +/// as of just before "MI". +/// +/// Search is localised to a neighborhood of +/// Neighborhood instructions before (searching for defs or kills) and N +/// instructions after (searching just for defs) MI. +MachineBasicBlock::LivenessQueryResult +MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, + unsigned Reg, const_iterator Before, + unsigned Neighborhood) const { + unsigned N = Neighborhood; + + // Start by searching backwards from Before, looking for kills, reads or defs. + const_iterator I(Before); + // If this is the first insn in the block, don't search backwards. + if (I != begin()) { + do { + --I; + + MachineOperandIteratorBase::PhysRegInfo Info = + ConstMIOperands(I).analyzePhysReg(Reg, TRI); + + // Defs happen after uses so they take precedence if both are present. + + // Register is dead after a dead def of the full register. + if (Info.DeadDef) + return LQR_Dead; + // Register is (at least partially) live after a def. + if (Info.Defined) + return LQR_Live; + // Register is dead after a full kill or clobber and no def. + if (Info.Killed || Info.Clobbered) + return LQR_Dead; + // Register must be live if we read it. + if (Info.Read) + return LQR_Live; + } while (I != begin() && --N > 0); + } + + // Did we get to the start of the block? + if (I == begin()) { + // If so, the register's state is definitely defined by the live-in state. + for (MCRegAliasIterator RAI(Reg, TRI, /*IncludeSelf=*/true); RAI.isValid(); + ++RAI) + if (isLiveIn(*RAI)) + return LQR_Live; + + return LQR_Dead; + } + + N = Neighborhood; + + // Try searching forwards from Before, looking for reads or defs. + I = const_iterator(Before); + // If this is the last insn in the block, don't search forwards. + if (I != end()) { + for (++I; I != end() && N > 0; ++I, --N) { + MachineOperandIteratorBase::PhysRegInfo Info = + ConstMIOperands(I).analyzePhysReg(Reg, TRI); + + // Register is live when we read it here. + if (Info.Read) + return LQR_Live; + // Register is dead if we can fully overwrite or clobber it here. + if (Info.FullyDefined || Info.Clobbered) + return LQR_Dead; + } + } + + // At this point we have no idea of the liveness of the register. + return LQR_Unknown; +} + +const uint32_t * +MachineBasicBlock::getBeginClobberMask(const TargetRegisterInfo *TRI) const { + // EH funclet entry does not preserve any registers. + return isEHFuncletEntry() ? TRI->getNoPreservedMask() : nullptr; +} + +const uint32_t * +MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const { + // If we see a return block with successors, this must be a funclet return, + // which does not preserve any registers. If there are no successors, we don't + // care what kind of return it is, putting a mask after it is a no-op. + return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr; +} diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp new file mode 100644 index 0000000..9119e31 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -0,0 +1,193 @@ +//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Loops should be simplified before this analysis. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyInfoImpl.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" + +using namespace llvm; + +#define DEBUG_TYPE "block-freq" + +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt<GVDAGType> +ViewMachineBlockFreqPropagationDAG("view-machine-block-freq-propagation-dags", + cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how machine block " + "frequencies propagate through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits<MachineBlockFrequencyInfo *> { + typedef const MachineBasicBlock NodeType; + typedef MachineBasicBlock::const_succ_iterator ChildIteratorType; + typedef MachineFunction::const_iterator nodes_iterator; + + static inline + const NodeType *getEntryNode(const MachineBlockFrequencyInfo *G) { + return &G->getFunction()->front(); + } + + static ChildIteratorType child_begin(const NodeType *N) { + return N->succ_begin(); + } + + static ChildIteratorType child_end(const NodeType *N) { + return N->succ_end(); + } + + static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + + static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits<MachineBlockFrequencyInfo*> : + public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineBlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineBlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewMachineBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->printBlockFreq(OS, Node); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + + +} // end namespace llvm +#endif + +INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineBlockFrequencyInfo, "machine-block-freq", + "Machine Block Frequency Analysis", true, true) + +char MachineBlockFrequencyInfo::ID = 0; + + +MachineBlockFrequencyInfo:: +MachineBlockFrequencyInfo() :MachineFunctionPass(ID) { + initializeMachineBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); +} + +MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() {} + +void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { + MachineBranchProbabilityInfo &MBPI = + getAnalysis<MachineBranchProbabilityInfo>(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + if (!MBFI) + MBFI.reset(new ImplType); + MBFI->calculate(F, MBPI, MLI); +#ifndef NDEBUG + if (ViewMachineBlockFreqPropagationDAG != GVDT_None) { + view(); + } +#endif + return false; +} + +void MachineBlockFrequencyInfo::releaseMemory() { MBFI.reset(); } + +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void MachineBlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast<MachineBlockFrequencyInfo *>(this), + "MachineBlockFrequencyDAGs"); +#else + errs() << "MachineBlockFrequencyInfo::view is only available in debug builds " + "on systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +BlockFrequency MachineBlockFrequencyInfo:: +getBlockFreq(const MachineBasicBlock *MBB) const { + return MBFI ? MBFI->getBlockFreq(MBB) : 0; +} + +const MachineFunction *MachineBlockFrequencyInfo::getFunction() const { + return MBFI ? MBFI->getFunction() : nullptr; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const BlockFrequency Freq) const { + return MBFI ? MBFI->printBlockFreq(OS, Freq) : OS; +} + +raw_ostream & +MachineBlockFrequencyInfo::printBlockFreq(raw_ostream &OS, + const MachineBasicBlock *MBB) const { + return MBFI ? MBFI->printBlockFreq(OS, MBB) : OS; +} + +uint64_t MachineBlockFrequencyInfo::getEntryFreq() const { + return MBFI ? MBFI->getEntryFreq() : 0; +} diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp new file mode 100644 index 0000000..f5e3056 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -0,0 +1,1481 @@ +//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations using the CFG +// structure and branch probability estimates. +// +// The pass strives to preserve the structure of the CFG (that is, retain +// a topological ordering of basic blocks) in the absence of a *strong* signal +// to the contrary from probabilities. However, within the CFG structure, it +// attempts to choose an ordering which favors placing more likely sequences of +// blocks adjacent to each other. +// +// The algorithm works from the inner-most loop within a function outward, and +// at each stage walks through the basic blocks, trying to coalesce them into +// sequential chains where allowed by the CFG (or demanded by heavy +// probabilities). Finally, it walks the blocks in topological order, and the +// first time it reaches a chain of basic blocks, it schedules them in the +// function in-order. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "block-placement" + +STATISTIC(NumCondBranches, "Number of conditional branches"); +STATISTIC(NumUncondBranches, "Number of unconditional branches"); +STATISTIC(CondBranchTakenFreq, + "Potential frequency of taking conditional branches"); +STATISTIC(UncondBranchTakenFreq, + "Potential frequency of taking unconditional branches"); + +static cl::opt<unsigned> AlignAllBlock("align-all-blocks", + cl::desc("Force the alignment of all " + "blocks in the function."), + cl::init(0), cl::Hidden); + +static cl::opt<unsigned> + AlignAllLoops("align-all-loops", + cl::desc("Force the alignment of all loops in the function."), + cl::init(0), cl::Hidden); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt<unsigned> ExitBlockBias( + "block-placement-exit-block-bias", + cl::desc("Block frequency percentage a loop exit block needs " + "over the original exit to be considered the new exit."), + cl::init(0), cl::Hidden); + +static cl::opt<bool> OutlineOptionalBranches( + "outline-optional-branches", + cl::desc("Put completely optional branches, i.e. branches with a common " + "post dominator, out of line."), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> OutlineOptionalThreshold( + "outline-optional-threshold", + cl::desc("Don't outline optional branches that are a single block with an " + "instruction count below this threshold"), + cl::init(4), cl::Hidden); + +static cl::opt<unsigned> LoopToColdBlockRatio( + "loop-to-cold-block-ratio", + cl::desc("Outline loop blocks from loop chain if (frequency of loop) / " + "(frequency of block) is greater than this ratio"), + cl::init(5), cl::Hidden); + +static cl::opt<bool> + PreciseRotationCost("precise-rotation-cost", + cl::desc("Model the cost of loop rotation more " + "precisely by using profile data."), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> MisfetchCost( + "misfetch-cost", + cl::desc("Cost that models the probablistic risk of an instruction " + "misfetch due to a jump comparing to falling through, whose cost " + "is zero."), + cl::init(1), cl::Hidden); + +static cl::opt<unsigned> JumpInstCost("jump-inst-cost", + cl::desc("Cost of jump instructions."), + cl::init(1), cl::Hidden); + +namespace { +class BlockChain; +/// \brief Type for our function-wide basic block -> block chain mapping. +typedef DenseMap<MachineBasicBlock *, BlockChain *> BlockToChainMapType; +} + +namespace { +/// \brief A chain of blocks which will be laid out contiguously. +/// +/// This is the datastructure representing a chain of consecutive blocks that +/// are profitable to layout together in order to maximize fallthrough +/// probabilities and code locality. We also can use a block chain to represent +/// a sequence of basic blocks which have some external (correctness) +/// requirement for sequential layout. +/// +/// Chains can be built around a single basic block and can be merged to grow +/// them. They participate in a block-to-chain mapping, which is updated +/// automatically as chains are merged together. +class BlockChain { + /// \brief The sequence of blocks belonging to this chain. + /// + /// This is the sequence of blocks for a particular chain. These will be laid + /// out in-order within the function. + SmallVector<MachineBasicBlock *, 4> Blocks; + + /// \brief A handle to the function-wide basic block to block chain mapping. + /// + /// This is retained in each block chain to simplify the computation of child + /// block chains for SCC-formation and iteration. We store the edges to child + /// basic blocks, and map them back to their associated chains using this + /// structure. + BlockToChainMapType &BlockToChain; + +public: + /// \brief Construct a new BlockChain. + /// + /// This builds a new block chain representing a single basic block in the + /// function. It also registers itself as the chain that block participates + /// in with the BlockToChain mapping. + BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { + assert(BB && "Cannot create a chain with a null basic block"); + BlockToChain[BB] = this; + } + + /// \brief Iterator over blocks within the chain. + typedef SmallVectorImpl<MachineBasicBlock *>::iterator iterator; + + /// \brief Beginning of blocks within the chain. + iterator begin() { return Blocks.begin(); } + + /// \brief End of blocks within the chain. + iterator end() { return Blocks.end(); } + + /// \brief Merge a block chain into this one. + /// + /// This routine merges a block chain into this one. It takes care of forming + /// a contiguous sequence of basic blocks, updating the edge list, and + /// updating the block -> chain mapping. It does not free or tear down the + /// old chain, but the old chain's block list is no longer valid. + void merge(MachineBasicBlock *BB, BlockChain *Chain) { + assert(BB); + assert(!Blocks.empty()); + + // Fast path in case we don't have a chain already. + if (!Chain) { + assert(!BlockToChain[BB]); + Blocks.push_back(BB); + BlockToChain[BB] = this; + return; + } + + assert(BB == *Chain->begin()); + assert(Chain->begin() != Chain->end()); + + // Update the incoming blocks to point to this chain, and add them to the + // chain structure. + for (MachineBasicBlock *ChainBB : *Chain) { + Blocks.push_back(ChainBB); + assert(BlockToChain[ChainBB] == Chain && "Incoming blocks not in chain"); + BlockToChain[ChainBB] = this; + } + } + +#ifndef NDEBUG + /// \brief Dump the blocks in this chain. + LLVM_DUMP_METHOD void dump() { + for (MachineBasicBlock *MBB : *this) + MBB->dump(); + } +#endif // NDEBUG + + /// \brief Count of predecessors within the loop currently being processed. + /// + /// This count is updated at each loop we process to represent the number of + /// in-loop predecessors of this chain. + unsigned LoopPredecessors; +}; +} + +namespace { +class MachineBlockPlacement : public MachineFunctionPass { + /// \brief A typedef for a block filter set. + typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet; + + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + + /// \brief A handle to the loop info. + const MachineLoopInfo *MLI; + + /// \brief A handle to the target's instruction info. + const TargetInstrInfo *TII; + + /// \brief A handle to the target's lowering info. + const TargetLoweringBase *TLI; + + /// \brief A handle to the post dominator tree. + MachineDominatorTree *MDT; + + /// \brief A set of blocks that are unavoidably execute, i.e. they dominate + /// all terminators of the MachineFunction. + SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks; + + /// \brief Allocator and owner of BlockChain structures. + /// + /// We build BlockChains lazily while processing the loop structure of + /// a function. To reduce malloc traffic, we allocate them using this + /// slab-like allocator, and destroy them after the pass completes. An + /// important guarantee is that this allocator produces stable pointers to + /// the chains. + SpecificBumpPtrAllocator<BlockChain> ChainAllocator; + + /// \brief Function wide BasicBlock to BlockChain mapping. + /// + /// This mapping allows efficiently moving from any given basic block to the + /// BlockChain it participates in, if any. We use it to, among other things, + /// allow implicitly defining edges between chains as the existing edges + /// between basic blocks. + DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain; + + void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = nullptr); + MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + selectBestCandidateBlock(BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter); + MachineBasicBlock * + getFirstUnplacedBlock(MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter); + void buildChain(MachineBasicBlock *BB, BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter = nullptr); + MachineBasicBlock *findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + MachineBasicBlock *findBestLoopExit(MachineFunction &F, MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + BlockFilterSet collectLoopBlockSet(MachineFunction &F, MachineLoop &L); + void buildLoopChains(MachineFunction &F, MachineLoop &L); + void rotateLoop(BlockChain &LoopChain, MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet); + void rotateLoopWithProfile(BlockChain &LoopChain, MachineLoop &L, + const BlockFilterSet &LoopBlockSet); + void buildCFGChains(MachineFunction &F); + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacement::ID = 0; +char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement", + "Branch Probability Basic Block Placement", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement", + "Branch Probability Basic Block Placement", false, false) + +#ifndef NDEBUG +/// \brief Helper to print the name of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockName(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber(); + OS << " (derived from LLVM BB '" << BB->getName() << "')"; + OS.flush(); + return Result; +} + +/// \brief Helper to print the number of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockNum(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber(); + OS.flush(); + return Result; +} +#endif + +/// \brief Mark a chain's successors as having one fewer preds. +/// +/// When a chain is being merged into the "placed" chain, this routine will +/// quickly walk the successors of each block in the chain and mark them as +/// having one fewer active predecessor. It also adds any successors of this +/// chain which reach the zero-predecessor state to the worklist passed in. +void MachineBlockPlacement::markChainSuccessors( + BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + // Walk all the blocks in this chain, marking their successors as having + // a predecessor placed. + for (MachineBasicBlock *MBB : Chain) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock *Succ : MBB->successors()) { + if (BlockFilter && !BlockFilter->count(Succ)) + continue; + BlockChain &SuccChain = *BlockToChain[Succ]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || Succ == LoopHeaderBB) + continue; + + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) + BlockWorkList.push_back(*SuccChain.begin()); + } + } +} + +/// \brief Select the best successor for a block. +/// +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock * +MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% + + MachineBasicBlock *BestSucc = nullptr; + auto BestProb = BranchProbability::getZero(); + + // Adjust edge probabilities by excluding edges pointing to blocks that is + // either not in BlockFilter or is already in the current chain. Consider the + // following CFG: + // + // --->A + // | / \ + // | B C + // | \ / \ + // ----D E + // + // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after + // A->C is chosen as a fall-through, D won't be selected as a successor of C + // due to CFG constraint (the probability of C->D is not greater than + // HotProb). If we exclude E that is not in BlockFilter when calculating the + // probability of C->D, D will be selected and we will get A C D B as the + // layout of this loop. + auto AdjustedSumProb = BranchProbability::getOne(); + SmallVector<MachineBasicBlock *, 4> Successors; + for (MachineBasicBlock *Succ : BB->successors()) { + bool SkipSucc = false; + if (BlockFilter && !BlockFilter->count(Succ)) { + SkipSucc = true; + } else { + BlockChain *SuccChain = BlockToChain[Succ]; + if (SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(Succ) + << " -> Already merged!\n"); + SkipSucc = true; + } else if (Succ != *SuccChain->begin()) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> Mid chain!\n"); + continue; + } + } + if (SkipSucc) + AdjustedSumProb -= MBPI->getEdgeProbability(BB, Succ); + else + Successors.push_back(Succ); + } + + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock *Succ : Successors) { + BranchProbability SuccProb; + uint32_t SuccProbN = MBPI->getEdgeProbability(BB, Succ).getNumerator(); + uint32_t SuccProbD = AdjustedSumProb.getNumerator(); + if (SuccProbN >= SuccProbD) + SuccProb = BranchProbability::getOne(); + else + SuccProb = BranchProbability(SuccProbN, SuccProbD); + + // If we outline optional branches, look whether Succ is unavoidable, i.e. + // dominates all terminators of the MachineFunction. If it does, other + // successors must be optional. Don't do this for cold branches. + if (OutlineOptionalBranches && SuccProb > HotProb.getCompl() && + UnavoidableBlocks.count(Succ) > 0) { + auto HasShortOptionalBranch = [&]() { + for (MachineBasicBlock *Pred : Succ->predecessors()) { + // Check whether there is an unplaced optional branch. + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + // Check whether the optional branch has exactly one BB. + if (Pred->pred_size() > 1 || *Pred->pred_begin() != BB) + continue; + // Check whether the optional branch is small. + if (Pred->size() < OutlineOptionalThreshold) + return true; + } + return false; + }; + if (!HasShortOptionalBranch()) + return Succ; + } + + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + BlockChain &SuccChain = *BlockToChain[Succ]; + if (SuccChain.LoopPredecessors != 0) { + if (SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + << " (prob) (CFG conflict)\n"); + continue; + } + + // Make sure that a hot successor doesn't have a globally more + // important predecessor. + auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ); + BlockFrequency CandidateEdgeFreq = + MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl(); + bool BadCFGConflict = false; + for (MachineBasicBlock *Pred : Succ->predecessors()) { + if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) || + BlockToChain[Pred] == &Chain) + continue; + BlockFrequency PredEdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ); + if (PredEdgeFreq >= CandidateEdgeFreq) { + BadCFGConflict = true; + break; + } + } + if (BadCFGConflict) { + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + << " (prob) (non-cold CFG conflict)\n"); + continue; + } + } + + DEBUG(dbgs() << " " << getBlockName(Succ) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestProb >= SuccProb) + continue; + BestSucc = Succ; + BestProb = SuccProb; + } + return BestSucc; +} + +/// \brief Select the best block from a worklist. +/// +/// This looks through the provided worklist as a list of candidate basic +/// blocks and select the most profitable one to place. The definition of +/// profitable only really makes sense in the context of a loop. This returns +/// the most frequently visited block in the worklist, which in the case of +/// a loop, is the one most desirable to be physically close to the rest of the +/// loop body in order to improve icache behavior. +/// +/// \returns The best block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList, + const BlockFilterSet *BlockFilter) { + // Once we need to walk the worklist looking for a candidate, cleanup the + // worklist of already placed entries. + // FIXME: If this shows up on profiles, it could be folded (at the cost of + // some code complexity) into the loop below. + WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), + [&](MachineBasicBlock *BB) { + return BlockToChain.lookup(BB) == &Chain; + }), + WorkList.end()); + + MachineBasicBlock *BestBlock = nullptr; + BlockFrequency BestFreq; + for (MachineBasicBlock *MBB : WorkList) { + BlockChain &SuccChain = *BlockToChain[MBB]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(MBB); + DEBUG(dbgs() << " " << getBlockName(MBB) << " -> "; + MBFI->printBlockFreq(dbgs(), CandidateFreq) << " (freq)\n"); + if (BestBlock && BestFreq >= CandidateFreq) + continue; + BestBlock = MBB; + BestFreq = CandidateFreq; + } + return BestBlock; +} + +/// \brief Retrieve the first unplaced basic block. +/// +/// This routine is called when we are unable to use the CFG to walk through +/// all of the basic blocks and form a chain due to unnatural loops in the CFG. +/// We walk through the function's blocks in order, starting from the +/// LastUnplacedBlockIt. We update this iterator on each call to avoid +/// re-scanning the entire sequence on repeated calls to this routine. +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + MachineFunction &F, const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt, + const BlockFilterSet *BlockFilter) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F.end(); I != E; + ++I) { + if (BlockFilter && !BlockFilter->count(&*I)) + continue; + if (BlockToChain[&*I] != &PlacedChain) { + PrevUnplacedBlockIt = I; + // Now select the head of the chain to which the unplaced block belongs + // as the block to place. This will force the entire chain to be placed, + // and satisfies the requirements of merging chains. + return *BlockToChain[&*I]->begin(); + } + } + return nullptr; +} + +void MachineBlockPlacement::buildChain( + MachineBasicBlock *BB, BlockChain &Chain, + SmallVectorImpl<MachineBasicBlock *> &BlockWorkList, + const BlockFilterSet *BlockFilter) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + MachineFunction &F = *BB->getParent(); + MachineFunction::iterator PrevUnplacedBlockIt = F.begin(); + + MachineBasicBlock *LoopHeaderBB = BB; + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + BB = *std::prev(Chain.end()); + for (;;) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*std::prev(Chain.end()) == BB); + + // Look for the best viable successor if there is one to place immediately + // after this block. + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + + // If an immediate successor isn't available, look for the best viable + // block among those we've identified as not violating the loop's CFG at + // this point. This won't be a fallthrough, but it will increase locality. + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); + + if (!BestSucc) { + BestSucc = + getFirstUnplacedBlock(F, Chain, PrevUnplacedBlockIt, BlockFilter); + if (!BestSucc) + break; + + DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); + } + + // Place this block, updating the datastructures to reflect its placement. + BlockChain &SuccChain = *BlockToChain[BestSucc]; + // Zero out LoopPredecessors for the successor we're about to merge in case + // we selected a successor that didn't fit naturally into the CFG. + SuccChain.LoopPredecessors = 0; + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " + << getBlockNum(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + Chain.merge(BestSucc, &SuccChain); + BB = *std::prev(Chain.end()); + } + + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); +} + +/// \brief Find the best loop top block for layout. +/// +/// Look for a block which is strictly better than the loop header for laying +/// out at the top of the loop. This looks for one and only one pattern: +/// a latch block with no conditional exit. This block will cause a conditional +/// jump around it or will be the bottom of the loop if we lay it out in place, +/// but if it it doesn't end up at the bottom of the loop for any reason, +/// rotation alone won't fix it. Because such a block will always result in an +/// unconditional jump (for the backedge) rotating it in front of the loop +/// header is always profitable. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopTop(MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // Check that the header hasn't been fused with a preheader block due to + // crazy branches. If it has, we need to start with the header at the top to + // prevent pulling the preheader into the loop body. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return L.getHeader(); + + DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(L.getHeader()) + << "\n"); + + BlockFrequency BestPredFreq; + MachineBasicBlock *BestPred = nullptr; + for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) { + if (!LoopBlockSet.count(Pred)) + continue; + DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", " + << Pred->succ_size() << " successors, "; + MBFI->printBlockFreq(dbgs(), Pred) << " freq\n"); + if (Pred->succ_size() > 1) + continue; + + BlockFrequency PredFreq = MBFI->getBlockFreq(Pred); + if (!BestPred || PredFreq > BestPredFreq || + (!(PredFreq < BestPredFreq) && + Pred->isLayoutSuccessor(L.getHeader()))) { + BestPred = Pred; + BestPredFreq = PredFreq; + } + } + + // If no direct predecessor is fine, just use the loop header. + if (!BestPred) + return L.getHeader(); + + // Walk backwards through any straight line of predecessors. + while (BestPred->pred_size() == 1 && + (*BestPred->pred_begin())->succ_size() == 1 && + *BestPred->pred_begin() != L.getHeader()) + BestPred = *BestPred->pred_begin(); + + DEBUG(dbgs() << " final top: " << getBlockName(BestPred) << "\n"); + return BestPred; +} + +/// \brief Find the best loop exiting block for layout. +/// +/// This routine implements the logic to analyze the loop looking for the best +/// block to layout at the top of the loop. Typically this is done to maximize +/// fallthrough opportunities. +MachineBasicBlock * +MachineBlockPlacement::findBestLoopExit(MachineFunction &F, MachineLoop &L, + const BlockFilterSet &LoopBlockSet) { + // We don't want to layout the loop linearly in all cases. If the loop header + // is just a normal basic block in the loop, we want to look for what block + // within the loop is the best one to layout at the top. However, if the loop + // header has be pre-merged into a chain due to predecessors not having + // analyzable branches, *and* the predecessor it is merged with is *not* part + // of the loop, rotating the header into the middle of the loop will create + // a non-contiguous range of blocks which is Very Bad. So start with the + // header and only rotate if safe. + BlockChain &HeaderChain = *BlockToChain[L.getHeader()]; + if (!LoopBlockSet.count(*HeaderChain.begin())) + return nullptr; + + BlockFrequency BestExitEdgeFreq; + unsigned BestExitLoopDepth = 0; + MachineBasicBlock *ExitingBB = nullptr; + // If there are exits to outer loops, loop rotation can severely limit + // fallthrough opportunites unless it selects such an exit. Keep a set of + // blocks where rotating to exit with that block will reach an outer loop. + SmallPtrSet<MachineBasicBlock *, 4> BlocksExitingToOuterLoop; + + DEBUG(dbgs() << "Finding best loop exit for: " << getBlockName(L.getHeader()) + << "\n"); + for (MachineBasicBlock *MBB : L.getBlocks()) { + BlockChain &Chain = *BlockToChain[MBB]; + // Ensure that this block is at the end of a chain; otherwise it could be + // mid-way through an inner loop or a successor of an unanalyzable branch. + if (MBB != *std::prev(Chain.end())) + continue; + + // Now walk the successors. We need to establish whether this has a viable + // exiting successor and whether it has a viable non-exiting successor. + // We store the old exiting state and restore it if a viable looping + // successor isn't found. + MachineBasicBlock *OldExitingBB = ExitingBB; + BlockFrequency OldBestExitEdgeFreq = BestExitEdgeFreq; + bool HasLoopingSucc = false; + for (MachineBasicBlock *Succ : MBB->successors()) { + if (Succ->isEHPad()) + continue; + if (Succ == MBB) + continue; + BlockChain &SuccChain = *BlockToChain[Succ]; + // Don't split chains, either this chain or the successor's chain. + if (&Chain == &SuccChain) { + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (chain conflict)\n"); + continue; + } + + auto SuccProb = MBPI->getEdgeProbability(MBB, Succ); + if (LoopBlockSet.count(Succ)) { + DEBUG(dbgs() << " looping: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " (" << SuccProb << ")\n"); + HasLoopingSucc = true; + continue; + } + + unsigned SuccLoopDepth = 0; + if (MachineLoop *ExitLoop = MLI->getLoopFor(Succ)) { + SuccLoopDepth = ExitLoop->getLoopDepth(); + if (ExitLoop->contains(&L)) + BlocksExitingToOuterLoop.insert(MBB); + } + + BlockFrequency ExitEdgeFreq = MBFI->getBlockFreq(MBB) * SuccProb; + DEBUG(dbgs() << " exiting: " << getBlockName(MBB) << " -> " + << getBlockName(Succ) << " [L:" << SuccLoopDepth << "] ("; + MBFI->printBlockFreq(dbgs(), ExitEdgeFreq) << ")\n"); + // Note that we bias this toward an existing layout successor to retain + // incoming order in the absence of better information. The exit must have + // a frequency higher than the current exit before we consider breaking + // the layout. + BranchProbability Bias(100 - ExitBlockBias, 100); + if (!ExitingBB || SuccLoopDepth > BestExitLoopDepth || + ExitEdgeFreq > BestExitEdgeFreq || + (MBB->isLayoutSuccessor(Succ) && + !(ExitEdgeFreq < BestExitEdgeFreq * Bias))) { + BestExitEdgeFreq = ExitEdgeFreq; + ExitingBB = MBB; + } + } + + if (!HasLoopingSucc) { + // Restore the old exiting state, no viable looping successor was found. + ExitingBB = OldExitingBB; + BestExitEdgeFreq = OldBestExitEdgeFreq; + continue; + } + } + // Without a candidate exiting block or with only a single block in the + // loop, just use the loop header to layout the loop. + if (!ExitingBB || L.getNumBlocks() == 1) + return nullptr; + + // Also, if we have exit blocks which lead to outer loops but didn't select + // one of them as the exiting block we are rotating toward, disable loop + // rotation altogether. + if (!BlocksExitingToOuterLoop.empty() && + !BlocksExitingToOuterLoop.count(ExitingBB)) + return nullptr; + + DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB) << "\n"); + return ExitingBB; +} + +/// \brief Attempt to rotate an exiting block to the bottom of the loop. +/// +/// Once we have built a chain, try to rotate it to line up the hot exit block +/// with fallthrough out of the loop if doing so doesn't introduce unnecessary +/// branches. For example, if the loop has fallthrough into its header and out +/// of its bottom already, don't rotate it. +void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain, + MachineBasicBlock *ExitingBB, + const BlockFilterSet &LoopBlockSet) { + if (!ExitingBB) + return; + + MachineBasicBlock *Top = *LoopChain.begin(); + bool ViableTopFallthrough = false; + for (MachineBasicBlock *Pred : Top->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + ViableTopFallthrough = true; + break; + } + } + + // If the header has viable fallthrough, check whether the current loop + // bottom is a viable exiting block. If so, bail out as rotating will + // introduce an unnecessary branch. + if (ViableTopFallthrough) { + MachineBasicBlock *Bottom = *std::prev(LoopChain.end()); + for (MachineBasicBlock *Succ : Bottom->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) + return; + } + } + + BlockChain::iterator ExitIt = + std::find(LoopChain.begin(), LoopChain.end(), ExitingBB); + if (ExitIt == LoopChain.end()) + return; + + std::rotate(LoopChain.begin(), std::next(ExitIt), LoopChain.end()); +} + +/// \brief Attempt to rotate a loop based on profile data to reduce branch cost. +/// +/// With profile data, we can determine the cost in terms of missed fall through +/// opportunities when rotating a loop chain and select the best rotation. +/// Basically, there are three kinds of cost to consider for each rotation: +/// 1. The possibly missed fall through edge (if it exists) from BB out of +/// the loop to the loop header. +/// 2. The possibly missed fall through edges (if they exist) from the loop +/// exits to BB out of the loop. +/// 3. The missed fall through edge (if it exists) from the last BB to the +/// first BB in the loop chain. +/// Therefore, the cost for a given rotation is the sum of costs listed above. +/// We select the best rotation with the smallest cost. +void MachineBlockPlacement::rotateLoopWithProfile( + BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) { + auto HeaderBB = L.getHeader(); + auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB); + auto RotationPos = LoopChain.end(); + + BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency(); + + // A utility lambda that scales up a block frequency by dividing it by a + // branch probability which is the reciprocal of the scale. + auto ScaleBlockFrequency = [](BlockFrequency Freq, + unsigned Scale) -> BlockFrequency { + if (Scale == 0) + return 0; + // Use operator / between BlockFrequency and BranchProbability to implement + // saturating multiplication. + return Freq / BranchProbability(1, Scale); + }; + + // Compute the cost of the missed fall-through edge to the loop header if the + // chain head is not the loop header. As we only consider natural loops with + // single header, this computation can be done only once. + BlockFrequency HeaderFallThroughCost(0); + for (auto *Pred : HeaderBB->predecessors()) { + BlockChain *PredChain = BlockToChain[Pred]; + if (!LoopBlockSet.count(Pred) && + (!PredChain || Pred == *std::prev(PredChain->end()))) { + auto EdgeFreq = + MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB); + auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost); + // If the predecessor has only an unconditional jump to the header, we + // need to consider the cost of this jump. + if (Pred->succ_size() == 1) + FallThruCost += ScaleBlockFrequency(EdgeFreq, JumpInstCost); + HeaderFallThroughCost = std::max(HeaderFallThroughCost, FallThruCost); + } + } + + // Here we collect all exit blocks in the loop, and for each exit we find out + // its hottest exit edge. For each loop rotation, we define the loop exit cost + // as the sum of frequencies of exit edges we collect here, excluding the exit + // edge from the tail of the loop chain. + SmallVector<std::pair<MachineBasicBlock *, BlockFrequency>, 4> ExitsWithFreq; + for (auto BB : LoopChain) { + auto LargestExitEdgeProb = BranchProbability::getZero(); + for (auto *Succ : BB->successors()) { + BlockChain *SuccChain = BlockToChain[Succ]; + if (!LoopBlockSet.count(Succ) && + (!SuccChain || Succ == *SuccChain->begin())) { + auto SuccProb = MBPI->getEdgeProbability(BB, Succ); + LargestExitEdgeProb = std::max(LargestExitEdgeProb, SuccProb); + } + } + if (LargestExitEdgeProb > BranchProbability::getZero()) { + auto ExitFreq = MBFI->getBlockFreq(BB) * LargestExitEdgeProb; + ExitsWithFreq.emplace_back(BB, ExitFreq); + } + } + + // In this loop we iterate every block in the loop chain and calculate the + // cost assuming the block is the head of the loop chain. When the loop ends, + // we should have found the best candidate as the loop chain's head. + for (auto Iter = LoopChain.begin(), TailIter = std::prev(LoopChain.end()), + EndIter = LoopChain.end(); + Iter != EndIter; Iter++, TailIter++) { + // TailIter is used to track the tail of the loop chain if the block we are + // checking (pointed by Iter) is the head of the chain. + if (TailIter == LoopChain.end()) + TailIter = LoopChain.begin(); + + auto TailBB = *TailIter; + + // Calculate the cost by putting this BB to the top. + BlockFrequency Cost = 0; + + // If the current BB is the loop header, we need to take into account the + // cost of the missed fall through edge from outside of the loop to the + // header. + if (Iter != HeaderIter) + Cost += HeaderFallThroughCost; + + // Collect the loop exit cost by summing up frequencies of all exit edges + // except the one from the chain tail. + for (auto &ExitWithFreq : ExitsWithFreq) + if (TailBB != ExitWithFreq.first) + Cost += ExitWithFreq.second; + + // The cost of breaking the once fall-through edge from the tail to the top + // of the loop chain. Here we need to consider three cases: + // 1. If the tail node has only one successor, then we will get an + // additional jmp instruction. So the cost here is (MisfetchCost + + // JumpInstCost) * tail node frequency. + // 2. If the tail node has two successors, then we may still get an + // additional jmp instruction if the layout successor after the loop + // chain is not its CFG successor. Note that the more frequently executed + // jmp instruction will be put ahead of the other one. Assume the + // frequency of those two branches are x and y, where x is the frequency + // of the edge to the chain head, then the cost will be + // (x * MisfetechCost + min(x, y) * JumpInstCost) * tail node frequency. + // 3. If the tail node has more than two successors (this rarely happens), + // we won't consider any additional cost. + if (TailBB->isSuccessor(*Iter)) { + auto TailBBFreq = MBFI->getBlockFreq(TailBB); + if (TailBB->succ_size() == 1) + Cost += ScaleBlockFrequency(TailBBFreq.getFrequency(), + MisfetchCost + JumpInstCost); + else if (TailBB->succ_size() == 2) { + auto TailToHeadProb = MBPI->getEdgeProbability(TailBB, *Iter); + auto TailToHeadFreq = TailBBFreq * TailToHeadProb; + auto ColderEdgeFreq = TailToHeadProb > BranchProbability(1, 2) + ? TailBBFreq * TailToHeadProb.getCompl() + : TailToHeadFreq; + Cost += ScaleBlockFrequency(TailToHeadFreq, MisfetchCost) + + ScaleBlockFrequency(ColderEdgeFreq, JumpInstCost); + } + } + + DEBUG(dbgs() << "The cost of loop rotation by making " << getBlockNum(*Iter) + << " to the top: " << Cost.getFrequency() << "\n"); + + if (Cost < SmallestRotationCost) { + SmallestRotationCost = Cost; + RotationPos = Iter; + } + } + + if (RotationPos != LoopChain.end()) { + DEBUG(dbgs() << "Rotate loop by making " << getBlockNum(*RotationPos) + << " to the top\n"); + std::rotate(LoopChain.begin(), RotationPos, LoopChain.end()); + } +} + +/// \brief Collect blocks in the given loop that are to be placed. +/// +/// When profile data is available, exclude cold blocks from the returned set; +/// otherwise, collect all blocks in the loop. +MachineBlockPlacement::BlockFilterSet +MachineBlockPlacement::collectLoopBlockSet(MachineFunction &F, MachineLoop &L) { + BlockFilterSet LoopBlockSet; + + // Filter cold blocks off from LoopBlockSet when profile data is available. + // Collect the sum of frequencies of incoming edges to the loop header from + // outside. If we treat the loop as a super block, this is the frequency of + // the loop. Then for each block in the loop, we calculate the ratio between + // its frequency and the frequency of the loop block. When it is too small, + // don't add it to the loop chain. If there are outer loops, then this block + // will be merged into the first outer loop chain for which this block is not + // cold anymore. This needs precise profile data and we only do this when + // profile data is available. + if (F.getFunction()->getEntryCount()) { + BlockFrequency LoopFreq(0); + for (auto LoopPred : L.getHeader()->predecessors()) + if (!L.contains(LoopPred)) + LoopFreq += MBFI->getBlockFreq(LoopPred) * + MBPI->getEdgeProbability(LoopPred, L.getHeader()); + + for (MachineBasicBlock *LoopBB : L.getBlocks()) { + auto Freq = MBFI->getBlockFreq(LoopBB).getFrequency(); + if (Freq == 0 || LoopFreq.getFrequency() / Freq > LoopToColdBlockRatio) + continue; + LoopBlockSet.insert(LoopBB); + } + } else + LoopBlockSet.insert(L.block_begin(), L.block_end()); + + return LoopBlockSet; +} + +/// \brief Forms basic block chains from the natural loop structures. +/// +/// These chains are designed to preserve the existing *structure* of the code +/// as much as possible. We can then stitch the chains together in a way which +/// both preserves the topological structure and minimizes taken conditional +/// branches. +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, + MachineLoop &L) { + // First recurse through any nested loops, building chains for those inner + // loops. + for (MachineLoop *InnerLoop : L) + buildLoopChains(F, *InnerLoop); + + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + BlockFilterSet LoopBlockSet = collectLoopBlockSet(F, L); + + // Check if we have profile data for this function. If yes, we will rotate + // this loop by modeling costs more precisely which requires the profile data + // for better layout. + bool RotateLoopWithProfile = + PreciseRotationCost && F.getFunction()->getEntryCount(); + + // First check to see if there is an obviously preferable top block for the + // loop. This will default to the header, but may end up as one of the + // predecessors to the header if there is one which will result in strictly + // fewer branches in the loop body. + // When we use profile data to rotate the loop, this is unnecessary. + MachineBasicBlock *LoopTop = + RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet); + + // If we selected just the header for the loop top, look for a potentially + // profitable exit block in the event that rotating the loop can eliminate + // branches by placing an exit edge at the bottom. + MachineBasicBlock *ExitingBB = nullptr; + if (!RotateLoopWithProfile && LoopTop == L.getHeader()) + ExitingBB = findBestLoopExit(F, L, LoopBlockSet); + + BlockChain &LoopChain = *BlockToChain[LoopTop]; + + // FIXME: This is a really lame way of walking the chains in the loop: we + // walk the blocks, and use a set to prevent visiting a particular chain + // twice. + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + assert(LoopChain.LoopPredecessors == 0); + UpdatedPreds.insert(&LoopChain); + + for (MachineBasicBlock *LoopBB : LoopBlockSet) { + BlockChain &Chain = *BlockToChain[LoopBB]; + if (!UpdatedPreds.insert(&Chain).second) + continue; + + assert(Chain.LoopPredecessors == 0); + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain || !LoopBlockSet.count(Pred)) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + buildChain(LoopTop, LoopChain, BlockWorkList, &LoopBlockSet); + + if (RotateLoopWithProfile) + rotateLoopWithProfile(LoopChain, L, LoopBlockSet); + else + rotateLoop(LoopChain, ExitingBB, LoopBlockSet); + + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadLoop = false; + if (LoopChain.LoopPredecessors) { + BadLoop = true; + dbgs() << "Loop chain contains a block without its preds placed!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + } + for (MachineBasicBlock *ChainBB : LoopChain) { + dbgs() << " ... " << getBlockName(ChainBB) << "\n"; + if (!LoopBlockSet.erase(ChainBB)) { + // We don't mark the loop as bad here because there are real situations + // where this can occur. For example, with an unanalyzable fallthrough + // from a loop block to a non-loop block or vice versa. + dbgs() << "Loop chain contains a block not contained by the loop!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(ChainBB) << "\n"; + } + } + + if (!LoopBlockSet.empty()) { + BadLoop = true; + for (MachineBasicBlock *LoopBB : LoopBlockSet) + dbgs() << "Loop contains blocks never placed into a chain!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(LoopBB) << "\n"; + } + assert(!BadLoop && "Detected problems with the placement of this loop."); + }); +} + +void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { + // Ensure that every BB in the function has an associated chain to simplify + // the assumptions of the remaining algorithm. + SmallVector<MachineOperand, 4> Cond; // For AnalyzeBranch. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = &*FI; + BlockChain *Chain = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + // Also, merge any blocks which we cannot reason about and must preserve + // the exact fallthrough behavior for. + for (;;) { + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond) || !FI->canFallThrough()) + break; + + MachineFunction::iterator NextFI = std::next(FI); + MachineBasicBlock *NextBB = &*NextFI; + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. + assert(NextFI != FE && "Can't fallthrough past the last block."); + DEBUG(dbgs() << "Pre-merging due to unanalyzable fallthrough: " + << getBlockName(BB) << " -> " << getBlockName(NextBB) + << "\n"); + Chain->merge(NextBB, nullptr); + FI = NextFI; + BB = NextBB; + } + } + + if (OutlineOptionalBranches) { + // Find the nearest common dominator of all of F's terminators. + MachineBasicBlock *Terminator = nullptr; + for (MachineBasicBlock &MBB : F) { + if (MBB.succ_size() == 0) { + if (Terminator == nullptr) + Terminator = &MBB; + else + Terminator = MDT->findNearestCommonDominator(Terminator, &MBB); + } + } + + // MBBs dominating this common dominator are unavoidable. + UnavoidableBlocks.clear(); + for (MachineBasicBlock &MBB : F) { + if (MDT->dominates(&MBB, Terminator)) { + UnavoidableBlocks.insert(&MBB); + } + } + } + + // Build any loop-based chains. + for (MachineLoop *L : *MLI) + buildLoopChains(F, *L); + + SmallVector<MachineBasicBlock *, 16> BlockWorkList; + + SmallPtrSet<BlockChain *, 4> UpdatedPreds; + for (MachineBasicBlock &MBB : F) { + BlockChain &Chain = *BlockToChain[&MBB]; + if (!UpdatedPreds.insert(&Chain).second) + continue; + + assert(Chain.LoopPredecessors == 0); + for (MachineBasicBlock *ChainBB : Chain) { + assert(BlockToChain[ChainBB] == &Chain); + for (MachineBasicBlock *Pred : ChainBB->predecessors()) { + if (BlockToChain[Pred] == &Chain) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*Chain.begin()); + } + + BlockChain &FunctionChain = *BlockToChain[&F.front()]; + buildChain(&F.front(), FunctionChain, BlockWorkList); + +#ifndef NDEBUG + typedef SmallPtrSet<MachineBasicBlock *, 16> FunctionBlockSetType; +#endif + DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadFunc = false; + FunctionBlockSetType FunctionBlockSet; + for (MachineBasicBlock &MBB : F) + FunctionBlockSet.insert(&MBB); + + for (MachineBasicBlock *ChainBB : FunctionChain) + if (!FunctionBlockSet.erase(ChainBB)) { + BadFunc = true; + dbgs() << "Function chain contains a block not in the function!\n" + << " Bad block: " << getBlockName(ChainBB) << "\n"; + } + + if (!FunctionBlockSet.empty()) { + BadFunc = true; + for (MachineBasicBlock *RemainingBB : FunctionBlockSet) + dbgs() << "Function contains blocks never placed into a chain!\n" + << " Bad block: " << getBlockName(RemainingBB) << "\n"; + } + assert(!BadFunc && "Detected problems with the block placement."); + }); + + // Splice the blocks into place. + MachineFunction::iterator InsertPos = F.begin(); + for (MachineBasicBlock *ChainBB : FunctionChain) { + DEBUG(dbgs() << (ChainBB == *FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(ChainBB) << "\n"); + if (InsertPos != MachineFunction::iterator(ChainBB)) + F.splice(InsertPos, ChainBB); + else + ++InsertPos; + + // Update the terminator of the previous block. + if (ChainBB == *FunctionChain.begin()) + continue; + MachineBasicBlock *PrevBB = &*std::prev(MachineFunction::iterator(ChainBB)); + + // FIXME: It would be awesome of updateTerminator would just return rather + // than assert when the branch cannot be analyzed in order to remove this + // boiler plate. + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == ChainBB)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = nullptr; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = nullptr; + } + } + + // If PrevBB has a two-way branch, try to re-order the branches + // such that we branch to the successor with higher probability first. + if (TBB && !Cond.empty() && FBB && + MBPI->getEdgeProbability(PrevBB, FBB) > + MBPI->getEdgeProbability(PrevBB, TBB) && + !TII->ReverseBranchCondition(Cond)) { + DEBUG(dbgs() << "Reverse order of the two branches: " + << getBlockName(PrevBB) << "\n"); + DEBUG(dbgs() << " Edge probability: " + << MBPI->getEdgeProbability(PrevBB, FBB) << " vs " + << MBPI->getEdgeProbability(PrevBB, TBB) << "\n"); + DebugLoc dl; // FIXME: this is nowhere + TII->RemoveBranch(*PrevBB); + TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; + } + if (needUpdateBr) + PrevBB->updateTerminator(); + } + } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) + F.back().updateTerminator(); + + // Walk through the backedges of the function now that we have fully laid out + // the basic blocks and align the destination of each backedge. We don't rely + // exclusively on the loop info here so that we can align backedges in + // unnatural CFGs and backedges that were introduced purely because of the + // loop rotations done during this layout pass. + // FIXME: Use Function::optForSize(). + if (F.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + return; + if (FunctionChain.begin() == FunctionChain.end()) + return; // Empty chain. + + const BranchProbability ColdProb(1, 5); // 20% + BlockFrequency EntryFreq = MBFI->getBlockFreq(&F.front()); + BlockFrequency WeightedEntryFreq = EntryFreq * ColdProb; + for (MachineBasicBlock *ChainBB : FunctionChain) { + if (ChainBB == *FunctionChain.begin()) + continue; + + // Don't align non-looping basic blocks. These are unlikely to execute + // enough times to matter in practice. Note that we'll still handle + // unnatural CFGs inside of a natural outer loop (the common case) and + // rotated loops. + MachineLoop *L = MLI->getLoopFor(ChainBB); + if (!L) + continue; + + if (AlignAllLoops) { + ChainBB->setAlignment(AlignAllLoops); + continue; + } + + unsigned Align = TLI->getPrefLoopAlignment(L); + if (!Align) + continue; // Don't care about loop alignment. + + // If the block is cold relative to the function entry don't waste space + // aligning it. + BlockFrequency Freq = MBFI->getBlockFreq(ChainBB); + if (Freq < WeightedEntryFreq) + continue; + + // If the block is cold relative to its loop header, don't align it + // regardless of what edges into the block exist. + MachineBasicBlock *LoopHeader = L->getHeader(); + BlockFrequency LoopHeaderFreq = MBFI->getBlockFreq(LoopHeader); + if (Freq < (LoopHeaderFreq * ColdProb)) + continue; + + // Check for the existence of a non-layout predecessor which would benefit + // from aligning this block. + MachineBasicBlock *LayoutPred = + &*std::prev(MachineFunction::iterator(ChainBB)); + + // Force alignment if all the predecessors are jumps. We already checked + // that the block isn't cold above. + if (!LayoutPred->isSuccessor(ChainBB)) { + ChainBB->setAlignment(Align); + continue; + } + + // Align this block if the layout predecessor's edge into this block is + // cold relative to the block. When this is true, other predecessors make up + // all of the hot entries into the block and thus alignment is likely to be + // important. + BranchProbability LayoutProb = + MBPI->getEdgeProbability(LayoutPred, ChainBB); + BlockFrequency LayoutEdgeFreq = MBFI->getBlockFreq(LayoutPred) * LayoutProb; + if (LayoutEdgeFreq <= (Freq * ColdProb)) + ChainBB->setAlignment(Align); + } +} + +bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (std::next(F.begin()) == F.end()) + return false; + + if (skipOptnoneFunction(*F.getFunction())) + return false; + + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); + TII = F.getSubtarget().getInstrInfo(); + TLI = F.getSubtarget().getTargetLowering(); + MDT = &getAnalysis<MachineDominatorTree>(); + assert(BlockToChain.empty()); + + buildCFGChains(F); + + BlockToChain.clear(); + ChainAllocator.DestroyAll(); + + if (AlignAllBlock) + // Align all of the blocks in the function to a specific alignment. + for (MachineBasicBlock &MBB : F) + MBB.setAlignment(AlignAllBlock); + + // We always return true as we have no way to track whether the final order + // differs from the original order. + return true; +} + +namespace { +/// \brief A pass to compute block placement statistics. +/// +/// A separate pass to compute interesting statistics for evaluating block +/// placement. This is separate from the actual placement pass so that they can +/// be computed in the absence of any placement transformations or when using +/// alternative placement strategies. +class MachineBlockPlacementStats : public MachineFunctionPass { + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; +} + +char MachineBlockPlacementStats::ID = 0; +char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID; +INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) + +bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (std::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + + for (MachineBasicBlock &MBB : F) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); + Statistic &NumBranches = + (MBB.succ_size() > 1) ? NumCondBranches : NumUncondBranches; + Statistic &BranchTakenFreq = + (MBB.succ_size() > 1) ? CondBranchTakenFreq : UncondBranchTakenFreq; + for (MachineBasicBlock *Succ : MBB.successors()) { + // Skip if this successor is a fallthrough. + if (MBB.isLayoutSuccessor(Succ)) + continue; + + BlockFrequency EdgeFreq = + BlockFreq * MBPI->getEdgeProbability(&MBB, Succ); + ++NumBranches; + BranchTakenFreq += EdgeFreq.getFrequency(); + } + } + + return false; +} diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp new file mode 100644 index 0000000..cf6d401 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -0,0 +1,83 @@ +//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis uses probability info stored in Machine Basic Blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) +INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", + "Machine Branch Probability Analysis", false, true) + +char MachineBranchProbabilityInfo::ID = 0; + +void MachineBranchProbabilityInfo::anchor() { } + +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, + MachineBasicBlock::const_succ_iterator Dst) const { + return Src->getSuccProbability(Dst); +} + +BranchProbability MachineBranchProbabilityInfo::getEdgeProbability( + const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const { + // This is a linear search. Try to use the const_succ_iterator version when + // possible. + return getEdgeProbability(Src, + std::find(Src->succ_begin(), Src->succ_end(), Dst)); +} + +bool +MachineBranchProbabilityInfo::isEdgeHot(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + // Hot probability is at least 4/5 = 80% + static BranchProbability HotProb(4, 5); + return getEdgeProbability(Src, Dst) > HotProb; +} + +MachineBasicBlock * +MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { + auto MaxProb = BranchProbability::getZero(); + MachineBasicBlock *MaxSucc = nullptr; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + auto Prob = getEdgeProbability(MBB, I); + if (Prob > MaxProb) { + MaxProb = Prob; + MaxSucc = *I; + } + } + + static BranchProbability HotProb(4, 5); + if (getEdgeProbability(MBB, MaxSucc) >= HotProb) + return MaxSucc; + + return nullptr; +} + +raw_ostream &MachineBranchProbabilityInfo::printEdgeProbability( + raw_ostream &OS, const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + + const BranchProbability Prob = getEdgeProbability(Src, Dst); + OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber() + << " probability is " << Prob + << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n"); + + return OS; +} diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp new file mode 100644 index 0000000..aad376c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -0,0 +1,711 @@ +//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs global common subexpression elimination on machine +// instructions using a scoped hash table based value numbering scheme. It +// must be run while the machine function is still in SSA form. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "machine-cse" + +STATISTIC(NumCoalesces, "Number of copies coalesced"); +STATISTIC(NumCSEs, "Number of common subexpression eliminated"); +STATISTIC(NumPhysCSEs, + "Number of physreg referencing common subexpr eliminated"); +STATISTIC(NumCrossBBCSEs, + "Number of cross-MBB physreg referencing CS eliminated"); +STATISTIC(NumCommutes, "Number of copies coalesced after commuting"); + +namespace { + class MachineCSE : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + AliasAnalysis *AA; + MachineDominatorTree *DT; + MachineRegisterInfo *MRI; + public: + static char ID; // Pass identification + MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(0), CurrVN(0) { + initializeMachineCSEPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + + void releaseMemory() override { + ScopeMap.clear(); + Exps.clear(); + } + + private: + unsigned LookAheadLimit; + typedef RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy; + typedef ScopedHashTable<MachineInstr*, unsigned, + MachineInstrExpressionTrait, AllocatorTy> ScopedHTType; + typedef ScopedHTType::ScopeTy ScopeType; + DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap; + ScopedHTType VNT; + SmallVector<MachineInstr*, 64> Exps; + unsigned CurrVN; + + bool PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB); + bool isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const; + bool hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<unsigned,8> &PhysRefs, + SmallVectorImpl<unsigned> &PhysDefs, + bool &PhysUseDef) const; + bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<unsigned,8> &PhysRefs, + SmallVectorImpl<unsigned> &PhysDefs, + bool &NonLocal) const; + bool isCSECandidate(MachineInstr *MI); + bool isProfitableToCSE(unsigned CSReg, unsigned Reg, + MachineInstr *CSMI, MachineInstr *MI); + void EnterScope(MachineBasicBlock *MBB); + void ExitScope(MachineBasicBlock *MBB); + bool ProcessBlock(MachineBasicBlock *MBB); + void ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren); + bool PerformCSE(MachineDomTreeNode *Node); + }; +} // end anonymous namespace + +char MachineCSE::ID = 0; +char &llvm::MachineCSEID = MachineCSE::ID; +INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MachineCSE, "machine-cse", + "Machine Common Subexpression Elimination", false, false) + +/// The source register of a COPY machine instruction can be propagated to all +/// its users, and this propagation could increase the probability of finding +/// common subexpressions. If the COPY has only one user, the COPY itself can +/// be removed. +bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI, + MachineBasicBlock *MBB) { + bool Changed = false; + for (MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg); + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (!DefMI->isCopy()) + continue; + unsigned SrcReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + continue; + if (DefMI->getOperand(0).getSubReg()) + continue; + // FIXME: We should trivially coalesce subregister copies to expose CSE + // opportunities on instructions with truncated operands (see + // cse-add-with-overflow.ll). This can be done here as follows: + // if (SrcSubReg) + // RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC, + // SrcSubReg); + // MO.substVirtReg(SrcReg, SrcSubReg, *TRI); + // + // The 2-addr pass has been updated to handle coalesced subregs. However, + // some machine-specific code still can't handle it. + // To handle it properly we also need a way find a constrained subregister + // class given a super-reg class and subreg index. + if (DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (!MRI->constrainRegClass(SrcReg, RC)) + continue; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + // Propagate SrcReg of copies to MI. + MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + // Coalesce single use copies. + if (OnlyOneUse) { + DefMI->eraseFromParent(); + ++NumCoalesces; + } + Changed = true; + } + + return Changed; +} + +bool +MachineCSE::isPhysDefTriviallyDead(unsigned Reg, + MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator E) const { + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I->isDebugValue()) + ++I; + + if (I == E) + // Reached end of block, register is obviously dead. + return true; + + bool SeenDef = false; + for (const MachineOperand &MO : I->operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) + SeenDef = true; + if (!MO.isReg() || !MO.getReg()) + continue; + if (!TRI->regsOverlap(MO.getReg(), Reg)) + continue; + if (MO.isUse()) + // Found a use! + return false; + SeenDef = true; + } + if (SeenDef) + // See a def of Reg (or an alias) before encountering any use, it's + // trivially dead. + return true; + + --LookAheadLeft; + ++I; + } + return false; +} + +/// hasLivePhysRegDefUses - Return true if the specified instruction read/write +/// physical registers (except for dead defs of physical registers). It also +/// returns the physical register def by reference if it's the only one and the +/// instruction does not uses a physical register. +bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, + const MachineBasicBlock *MBB, + SmallSet<unsigned,8> &PhysRefs, + SmallVectorImpl<unsigned> &PhysDefs, + bool &PhysUseDef) const{ + // First, add all uses to PhysRefs. + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Reading constant physregs is ok. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + } + + // Next, collect all defs into PhysDefs. If any is already in PhysRefs + // (which currently contains only uses), set the PhysUseDef flag. + PhysUseDef = false; + MachineBasicBlock::const_iterator I = MI; I = std::next(I); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + // Check against PhysRefs even if the def is "dead". + if (PhysRefs.count(Reg)) + PhysUseDef = true; + // If the def is dead, it's ok. But the def may not marked "dead". That's + // common since this pass is run before livevariables. We can scan + // forward a few instructions and check if it is obviously dead. + if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end())) + PhysDefs.push_back(Reg); + } + + // Finally, add all defs to PhysRefs as well. + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) + for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI) + PhysRefs.insert(*AI); + + return !PhysRefs.empty(); +} + +bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, + SmallSet<unsigned,8> &PhysRefs, + SmallVectorImpl<unsigned> &PhysDefs, + bool &NonLocal) const { + // For now conservatively returns false if the common subexpression is + // not in the same basic block as the given instruction. The only exception + // is if the common subexpression is in the sole predecessor block. + const MachineBasicBlock *MBB = MI->getParent(); + const MachineBasicBlock *CSMBB = CSMI->getParent(); + + bool CrossMBB = false; + if (CSMBB != MBB) { + if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB) + return false; + + for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) { + if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i])) + // Avoid extending live range of physical registers if they are + //allocatable or reserved. + return false; + } + CrossMBB = true; + } + MachineBasicBlock::const_iterator I = CSMI; I = std::next(I); + MachineBasicBlock::const_iterator E = MI; + MachineBasicBlock::const_iterator EE = CSMBB->end(); + unsigned LookAheadLeft = LookAheadLimit; + while (LookAheadLeft) { + // Skip over dbg_value's. + while (I != E && I != EE && I->isDebugValue()) + ++I; + + if (I == EE) { + assert(CrossMBB && "Reaching end-of-MBB without finding MI?"); + (void)CrossMBB; + CrossMBB = false; + NonLocal = true; + I = MBB->begin(); + EE = MBB->end(); + continue; + } + + if (I == E) + return true; + + for (const MachineOperand &MO : I->operands()) { + // RegMasks go on instructions like calls that clobber lots of physregs. + // Don't attempt to CSE across such an instruction. + if (MO.isRegMask()) + return false; + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(MOReg)) + continue; + if (PhysRefs.count(MOReg)) + return false; + } + + --LookAheadLeft; + ++I; + } + + return false; +} + +bool MachineCSE::isCSECandidate(MachineInstr *MI) { + if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() || + MI->isInlineAsm() || MI->isDebugValue()) + return false; + + // Ignore copies. + if (MI->isCopyLike()) + return false; + + // Ignore stuff that we obviously can't move. + if (MI->mayStore() || MI->isCall() || MI->isTerminator() || + MI->hasUnmodeledSideEffects()) + return false; + + if (MI->mayLoad()) { + // Okay, this instruction does a load. As a refinement, we allow the target + // to decide whether the loaded value is actually a constant. If so, we can + // actually use it as a load. + if (!MI->isInvariantLoad(AA)) + // FIXME: we should be able to hoist loads with no other side effects if + // there are no other instructions which can change memory in this loop. + // This is a trivial form of alias analysis. + return false; + } + return true; +} + +/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a +/// common expression that defines Reg. +bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg, + MachineInstr *CSMI, MachineInstr *MI) { + // FIXME: Heuristics that works around the lack the live range splitting. + + // If CSReg is used at all uses of Reg, CSE should not increase register + // pressure of CSReg. + bool MayIncreasePressure = true; + if (TargetRegisterInfo::isVirtualRegister(CSReg) && + TargetRegisterInfo::isVirtualRegister(Reg)) { + MayIncreasePressure = false; + SmallPtrSet<MachineInstr*, 8> CSUses; + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + CSUses.insert(&MI); + } + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + if (!CSUses.count(&MI)) { + MayIncreasePressure = true; + break; + } + } + } + if (!MayIncreasePressure) return true; + + // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in + // an immediate predecessor. We don't want to increase register pressure and + // end up causing other computation to be spilled. + if (TII->isAsCheapAsAMove(MI)) { + MachineBasicBlock *CSBB = CSMI->getParent(); + MachineBasicBlock *BB = MI->getParent(); + if (CSBB != BB && !CSBB->isSuccessor(BB)) + return false; + } + + // Heuristics #2: If the expression doesn't not use a vr and the only use + // of the redundant computation are copies, do not cse. + bool HasVRegUse = false; + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.isUse() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + HasVRegUse = true; + break; + } + } + if (!HasVRegUse) { + bool HasNonCopyUse = false; + for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { + // Ignore copies. + if (!MI.isCopyLike()) { + HasNonCopyUse = true; + break; + } + } + if (!HasNonCopyUse) + return false; + } + + // Heuristics #3: If the common subexpression is used by PHIs, do not reuse + // it unless the defined value is already used in the BB of the new use. + bool HasPHI = false; + SmallPtrSet<MachineBasicBlock*, 4> CSBBs; + for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) { + HasPHI |= MI.isPHI(); + CSBBs.insert(MI.getParent()); + } + + if (!HasPHI) + return true; + return CSBBs.count(MI->getParent()); +} + +void MachineCSE::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + ScopeType *Scope = new ScopeType(VNT); + ScopeMap[MBB] = Scope; +} + +void MachineCSE::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB); + assert(SI != ScopeMap.end()); + delete SI->second; + ScopeMap.erase(SI); +} + +bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { + bool Changed = false; + + SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs; + SmallVector<unsigned, 2> ImplicitDefsToUpdate; + SmallVector<unsigned, 2> ImplicitDefs; + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (!isCSECandidate(MI)) + continue; + + bool FoundCSE = VNT.count(MI); + if (!FoundCSE) { + // Using trivial copy propagation to find more CSE opportunities. + if (PerformTrivialCopyPropagation(MI, MBB)) { + Changed = true; + + // After coalescing MI itself may become a copy. + if (MI->isCopyLike()) + continue; + + // Try again to see if CSE is possible. + FoundCSE = VNT.count(MI); + } + } + + // Commute commutable instructions. + bool Commuted = false; + if (!FoundCSE && MI->isCommutable()) { + MachineInstr *NewMI = TII->commuteInstruction(MI); + if (NewMI) { + Commuted = true; + FoundCSE = VNT.count(NewMI); + if (NewMI != MI) { + // New instruction. It doesn't need to be kept. + NewMI->eraseFromParent(); + Changed = true; + } else if (!FoundCSE) + // MI was changed but it didn't help, commute it back! + (void)TII->commuteInstruction(MI); + } + } + + // If the instruction defines physical registers and the values *may* be + // used, then it's not safe to replace it with a common subexpression. + // It's also not safe if the instruction uses physical registers. + bool CrossMBBPhysDef = false; + SmallSet<unsigned, 8> PhysRefs; + SmallVector<unsigned, 2> PhysDefs; + bool PhysUseDef = false; + if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs, + PhysDefs, PhysUseDef)) { + FoundCSE = false; + + // ... Unless the CS is local or is in the sole predecessor block + // and it also defines the physical register which is not clobbered + // in between and the physical register uses were not clobbered. + // This can never be the case if the instruction both uses and + // defines the same physical register, which was detected above. + if (!PhysUseDef) { + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef)) + FoundCSE = true; + } + } + + if (!FoundCSE) { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + continue; + } + + // Found a common subexpression, eliminate it. + unsigned CSVN = VNT.lookup(MI); + MachineInstr *CSMI = Exps[CSVN]; + DEBUG(dbgs() << "Examining: " << *MI); + DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI); + + // Check if it's profitable to perform this CSE. + bool DoCSE = true; + unsigned NumDefs = MI->getDesc().getNumDefs() + + MI->getDesc().getNumImplicitDefs(); + + for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned OldReg = MO.getReg(); + unsigned NewReg = CSMI->getOperand(i).getReg(); + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead()) + ImplicitDefsToUpdate.push_back(i); + + // Keep track of implicit defs of CSMI and MI, to clear possibly + // made-redundant kill flags. + if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg) + ImplicitDefs.push_back(OldReg); + + if (OldReg == NewReg) { + --NumDefs; + continue; + } + + assert(TargetRegisterInfo::isVirtualRegister(OldReg) && + TargetRegisterInfo::isVirtualRegister(NewReg) && + "Do not CSE physical register defs!"); + + if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) { + DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); + DoCSE = false; + break; + } + + // Don't perform CSE if the result of the old instruction cannot exist + // within the register class of the new instruction. + const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg); + if (!MRI->constrainRegClass(NewReg, OldRC)) { + DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n"); + DoCSE = false; + break; + } + + CSEPairs.push_back(std::make_pair(OldReg, NewReg)); + --NumDefs; + } + + // Actually perform the elimination. + if (DoCSE) { + for (std::pair<unsigned, unsigned> &CSEPair : CSEPairs) { + unsigned OldReg = CSEPair.first; + unsigned NewReg = CSEPair.second; + // OldReg may have been unused but is used now, clear the Dead flag + MachineInstr *Def = MRI->getUniqueVRegDef(NewReg); + assert(Def != nullptr && "CSEd register has no unique definition?"); + Def->clearRegisterDeads(NewReg); + // Replace with NewReg and clear kill flags which may be wrong now. + MRI->replaceRegWith(OldReg, NewReg); + MRI->clearKillFlags(NewReg); + } + + // Go through implicit defs of CSMI and MI, if a def is not dead at MI, + // we should make sure it is not dead at CSMI. + for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate) + CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false); + + // Go through implicit defs of CSMI and MI, and clear the kill flags on + // their uses in all the instructions between CSMI and MI. + // We might have made some of the kill flags redundant, consider: + // subs ... %NZCV<imp-def> <- CSMI + // csinc ... %NZCV<imp-use,kill> <- this kill flag isn't valid anymore + // subs ... %NZCV<imp-def> <- MI, to be eliminated + // csinc ... %NZCV<imp-use,kill> + // Since we eliminated MI, and reused a register imp-def'd by CSMI + // (here %NZCV), that register, if it was killed before MI, should have + // that kill flag removed, because it's lifetime was extended. + if (CSMI->getParent() == MI->getParent()) { + for (MachineBasicBlock::iterator II = CSMI, IE = MI; II != IE; ++II) + for (auto ImplicitDef : ImplicitDefs) + if (MachineOperand *MO = II->findRegisterUseOperand( + ImplicitDef, /*isKill=*/true, TRI)) + MO->setIsKill(false); + } else { + // If the instructions aren't in the same BB, bail out and clear the + // kill flag on all uses of the imp-def'd register. + for (auto ImplicitDef : ImplicitDefs) + MRI->clearKillFlags(ImplicitDef); + } + + if (CrossMBBPhysDef) { + // Add physical register defs now coming in from a predecessor to MBB + // livein list. + while (!PhysDefs.empty()) { + unsigned LiveIn = PhysDefs.pop_back_val(); + if (!MBB->isLiveIn(LiveIn)) + MBB->addLiveIn(LiveIn); + } + ++NumCrossBBCSEs; + } + + MI->eraseFromParent(); + ++NumCSEs; + if (!PhysRefs.empty()) + ++NumPhysCSEs; + if (Commuted) + ++NumCommutes; + Changed = true; + } else { + VNT.insert(MI, CurrVN++); + Exps.push_back(MI); + } + CSEPairs.clear(); + ImplicitDefsToUpdate.clear(); + ImplicitDefs.clear(); + } + + return Changed; +} + +/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given +/// dominator tree node if its a leaf or all of its children are done. Walk +/// up the dominator tree to destroy ancestors which are now done. +void +MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = Node->getIDom()) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) { + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + CurrVN = 0; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(Node); + do { + Node = WorkList.pop_back_val(); + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + OpenChildren[Node] = Children.size(); + for (MachineDomTreeNode *Child : Children) + WorkList.push_back(Child); + } while (!WorkList.empty()); + + // Now perform CSE. + bool Changed = false; + for (MachineDomTreeNode *Node : Scopes) { + MachineBasicBlock *MBB = Node->getBlock(); + EnterScope(MBB); + Changed |= ProcessBlock(MBB); + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren); + } + + return Changed; +} + +bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + DT = &getAnalysis<MachineDominatorTree>(); + LookAheadLimit = TII->getMachineCSELookAheadLimit(); + return PerformCSE(DT->getRootNode()); +} diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp new file mode 100644 index 0000000..fa43c4d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp @@ -0,0 +1,468 @@ +//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The machine combiner pass uses machine trace metrics to ensure the combined +// instructions does not lengthen the critical path or the resource depth. +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "machine-combiner" + +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +STATISTIC(NumInstCombined, "Number of machineinst combined"); + +namespace { +class MachineCombiner : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MCSchedModel SchedModel; + MachineRegisterInfo *MRI; + MachineTraceMetrics *Traces; + MachineTraceMetrics::Ensemble *MinInstr; + + TargetSchedModel TSchedModel; + + /// True if optimizing for code size. + bool OptSize; + +public: + static char ID; + MachineCombiner() : MachineFunctionPass(ID) { + initializeMachineCombinerPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + const char *getPassName() const override { return "Machine InstCombiner"; } + +private: + bool doSubstitute(unsigned NewSize, unsigned OldSize); + bool combineInstructions(MachineBasicBlock *); + MachineInstr *getOperandDef(const MachineOperand &MO); + unsigned getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace); + unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace); + bool + improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineCombinerPattern Pattern); + bool preservesResourceLen(MachineBasicBlock *MBB, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs); + void instr2instrSC(SmallVectorImpl<MachineInstr *> &Instrs, + SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC); +}; +} + +char MachineCombiner::ID = 0; +char &llvm::MachineCombinerID = MachineCombiner::ID; + +INITIALIZE_PASS_BEGIN(MachineCombiner, "machine-combiner", + "Machine InstCombiner", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics) +INITIALIZE_PASS_END(MachineCombiner, "machine-combiner", "Machine InstCombiner", + false, false) + +void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<MachineTraceMetrics>(); + AU.addPreserved<MachineTraceMetrics>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) { + MachineInstr *DefInstr = nullptr; + // We need a virtual register definition. + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + DefInstr = MRI->getUniqueVRegDef(MO.getReg()); + // PHI's have no depth etc. + if (DefInstr && DefInstr->isPHI()) + DefInstr = nullptr; + return DefInstr; +} + +/// Computes depth of instructions in vector \InsInstr. +/// +/// \param InsInstrs is a vector of machine instructions +/// \param InstrIdxForVirtReg is a dense map of virtual register to index +/// of defining machine instruction in \p InsInstrs +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Depth of last instruction in \InsInstrs ("NewRoot") +unsigned +MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineTraceMetrics::Trace BlockTrace) { + SmallVector<unsigned, 16> InstrDepth; + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + + // For each instruction in the new sequence compute the depth based on the + // operands. Use the trace information when possible. For new operands which + // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth + for (auto *InstrPtr : InsInstrs) { // for each Use + unsigned IDepth = 0; + DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";); + for (const MachineOperand &MO : InstrPtr->operands()) { + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isUse()) + continue; + unsigned DepthOp = 0; + unsigned LatencyOp = 0; + DenseMap<unsigned, unsigned>::iterator II = + InstrIdxForVirtReg.find(MO.getReg()); + if (II != InstrIdxForVirtReg.end()) { + // Operand is new virtual register not in trace + assert(II->second < InstrDepth.size() && "Bad Index"); + MachineInstr *DefInstr = InsInstrs[II->second]; + assert(DefInstr && + "There must be a definition for a new virtual register"); + DepthOp = InstrDepth[II->second]; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } else { + MachineInstr *DefInstr = getOperandDef(MO); + if (DefInstr) { + DepthOp = BlockTrace.getInstrCycles(DefInstr).Depth; + LatencyOp = TSchedModel.computeOperandLatency( + DefInstr, DefInstr->findRegisterDefOperandIdx(MO.getReg()), + InstrPtr, InstrPtr->findRegisterUseOperandIdx(MO.getReg())); + } + } + IDepth = std::max(IDepth, DepthOp + LatencyOp); + } + InstrDepth.push_back(IDepth); + } + unsigned NewRootIdx = InsInstrs.size() - 1; + return InstrDepth[NewRootIdx]; +} + +/// Computes instruction latency as max of latency of defined operands. +/// +/// \param Root is a machine instruction that could be replaced by NewRoot. +/// It is used to compute a more accurate latency information for NewRoot in +/// case there is a dependent instruction in the same trace (\p BlockTrace) +/// \param NewRoot is the instruction for which the latency is computed +/// \param BlockTrace is a trace of machine instructions +/// +/// \returns Latency of \p NewRoot +unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot, + MachineTraceMetrics::Trace BlockTrace) { + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + + // Check each definition in NewRoot and compute the latency + unsigned NewRootLatency = 0; + + for (const MachineOperand &MO : NewRoot->operands()) { + // Check for virtual register operand. + if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))) + continue; + if (!MO.isDef()) + continue; + // Get the first instruction that uses MO + MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(MO.getReg()); + RI++; + MachineInstr *UseMO = RI->getParent(); + unsigned LatencyOp = 0; + if (UseMO && BlockTrace.isDepInTrace(Root, UseMO)) { + LatencyOp = TSchedModel.computeOperandLatency( + NewRoot, NewRoot->findRegisterDefOperandIdx(MO.getReg()), UseMO, + UseMO->findRegisterUseOperandIdx(MO.getReg())); + } else { + LatencyOp = TSchedModel.computeInstrLatency(NewRoot); + } + NewRootLatency = std::max(NewRootLatency, LatencyOp); + } + return NewRootLatency; +} + +/// The combiner's goal may differ based on which pattern it is attempting +/// to optimize. +enum class CombinerObjective { + MustReduceDepth, // The data dependency chain must be improved. + Default // The critical path must not be lengthened. +}; + +static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { + // TODO: If C++ ever gets a real enum class, make this part of the + // MachineCombinerPattern class. + switch (P) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_BY: + case MachineCombinerPattern::REASSOC_XA_YB: + return CombinerObjective::MustReduceDepth; + default: + return CombinerObjective::Default; + } +} + +/// The DAGCombine code sequence ends in MI (Machine Instruction) Root. +/// The new code sequence ends in MI NewRoot. A necessary condition for the new +/// sequence to replace the old sequence is that it cannot lengthen the critical +/// path. The definition of "improve" may be restricted by specifying that the +/// new path improves the data dependency chain (MustReduceDepth). +bool MachineCombiner::improvesCriticalPathLen( + MachineBasicBlock *MBB, MachineInstr *Root, + MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, + MachineCombinerPattern Pattern) { + assert(TSchedModel.hasInstrSchedModelOrItineraries() && + "Missing machine model\n"); + // NewRoot is the last instruction in the \p InsInstrs vector. + unsigned NewRootIdx = InsInstrs.size() - 1; + MachineInstr *NewRoot = InsInstrs[NewRootIdx]; + + // Get depth and latency of NewRoot and Root. + unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); + unsigned RootDepth = BlockTrace.getInstrCycles(Root).Depth; + + DEBUG(dbgs() << "DEPENDENCE DATA FOR " << Root << "\n"; + dbgs() << " NewRootDepth: " << NewRootDepth << "\n"; + dbgs() << " RootDepth: " << RootDepth << "\n"); + + // For a transform such as reassociation, the cost equation is + // conservatively calculated so that we must improve the depth (data + // dependency cycles) in the critical path to proceed with the transform. + // Being conservative also protects against inaccuracies in the underlying + // machine trace metrics and CPU models. + if (getCombinerObjective(Pattern) == CombinerObjective::MustReduceDepth) + return NewRootDepth < RootDepth; + + // A more flexible cost calculation for the critical path includes the slack + // of the original code sequence. This may allow the transform to proceed + // even if the instruction depths (data dependency cycles) become worse. + unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace); + unsigned RootLatency = TSchedModel.computeInstrLatency(Root); + unsigned RootSlack = BlockTrace.getInstrSlack(Root); + + DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n"; + dbgs() << " RootLatency: " << RootLatency << "\n"; + dbgs() << " RootSlack: " << RootSlack << "\n"; + dbgs() << " NewRootDepth + NewRootLatency = " + << NewRootDepth + NewRootLatency << "\n"; + dbgs() << " RootDepth + RootLatency + RootSlack = " + << RootDepth + RootLatency + RootSlack << "\n";); + + unsigned NewCycleCount = NewRootDepth + NewRootLatency; + unsigned OldCycleCount = RootDepth + RootLatency + RootSlack; + + return NewCycleCount <= OldCycleCount; +} + +/// helper routine to convert instructions into SC +void MachineCombiner::instr2instrSC( + SmallVectorImpl<MachineInstr *> &Instrs, + SmallVectorImpl<const MCSchedClassDesc *> &InstrsSC) { + for (auto *InstrPtr : Instrs) { + unsigned Opc = InstrPtr->getOpcode(); + unsigned Idx = TII->get(Opc).getSchedClass(); + const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(Idx); + InstrsSC.push_back(SC); + } +} + +/// True when the new instructions do not increase resource length +bool MachineCombiner::preservesResourceLen( + MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs) { + if (!TSchedModel.hasInstrSchedModel()) + return true; + + // Compute current resource length + + //ArrayRef<const MachineBasicBlock *> MBBarr(MBB); + SmallVector <const MachineBasicBlock *, 1> MBBarr; + MBBarr.push_back(MBB); + unsigned ResLenBeforeCombine = BlockTrace.getResourceLength(MBBarr); + + // Deal with SC rather than Instructions. + SmallVector<const MCSchedClassDesc *, 16> InsInstrsSC; + SmallVector<const MCSchedClassDesc *, 16> DelInstrsSC; + + instr2instrSC(InsInstrs, InsInstrsSC); + instr2instrSC(DelInstrs, DelInstrsSC); + + ArrayRef<const MCSchedClassDesc *> MSCInsArr = makeArrayRef(InsInstrsSC); + ArrayRef<const MCSchedClassDesc *> MSCDelArr = makeArrayRef(DelInstrsSC); + + // Compute new resource length. + unsigned ResLenAfterCombine = + BlockTrace.getResourceLength(MBBarr, MSCInsArr, MSCDelArr); + + DEBUG(dbgs() << "RESOURCE DATA: \n"; + dbgs() << " resource len before: " << ResLenBeforeCombine + << " after: " << ResLenAfterCombine << "\n";); + + return ResLenAfterCombine <= ResLenBeforeCombine; +} + +/// \returns true when new instruction sequence should be generated +/// independent if it lengthens critical path or not +bool MachineCombiner::doSubstitute(unsigned NewSize, unsigned OldSize) { + if (OptSize && (NewSize < OldSize)) + return true; + if (!TSchedModel.hasInstrSchedModelOrItineraries()) + return true; + return false; +} + +/// Substitute a slow code sequence with a faster one by +/// evaluating instruction combining pattern. +/// The prototype of such a pattern is MUl + ADD -> MADD. Performs instruction +/// combining based on machine trace metrics. Only combine a sequence of +/// instructions when this neither lengthens the critical path nor increases +/// resource pressure. When optimizing for codesize always combine when the new +/// sequence is shorter. +bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) { + bool Changed = false; + DEBUG(dbgs() << "Combining MBB " << MBB->getName() << "\n"); + + auto BlockIter = MBB->begin(); + + while (BlockIter != MBB->end()) { + auto &MI = *BlockIter++; + + DEBUG(dbgs() << "INSTR "; MI.dump(); dbgs() << "\n";); + SmallVector<MachineCombinerPattern, 16> Patterns; + // The motivating example is: + // + // MUL Other MUL_op1 MUL_op2 Other + // \ / \ | / + // ADD/SUB => MADD/MSUB + // (=Root) (=NewRoot) + + // The DAGCombine code always replaced MUL + ADD/SUB by MADD. While this is + // usually beneficial for code size it unfortunately can hurt performance + // when the ADD is on the critical path, but the MUL is not. With the + // substitution the MUL becomes part of the critical path (in form of the + // MADD) and can lengthen it on architectures where the MADD latency is + // longer than the ADD latency. + // + // For each instruction we check if it can be the root of a combiner + // pattern. Then for each pattern the new code sequence in form of MI is + // generated and evaluated. When the efficiency criteria (don't lengthen + // critical path, don't use more resources) is met the new sequence gets + // hooked up into the basic block before the old sequence is removed. + // + // The algorithm does not try to evaluate all patterns and pick the best. + // This is only an artificial restriction though. In practice there is + // mostly one pattern, and getMachineCombinerPatterns() can order patterns + // based on an internal cost heuristic. + + if (!TII->getMachineCombinerPatterns(MI, Patterns)) + continue; + + for (auto P : Patterns) { + SmallVector<MachineInstr *, 16> InsInstrs; + SmallVector<MachineInstr *, 16> DelInstrs; + DenseMap<unsigned, unsigned> InstrIdxForVirtReg; + if (!MinInstr) + MinInstr = Traces->getEnsemble(MachineTraceMetrics::TS_MinInstrCount); + MachineTraceMetrics::Trace BlockTrace = MinInstr->getTrace(MBB); + Traces->verifyAnalysis(); + TII->genAlternativeCodeSequence(MI, P, InsInstrs, DelInstrs, + InstrIdxForVirtReg); + unsigned NewInstCount = InsInstrs.size(); + unsigned OldInstCount = DelInstrs.size(); + // Found pattern, but did not generate alternative sequence. + // This can happen e.g. when an immediate could not be materialized + // in a single instruction. + if (!NewInstCount) + continue; + + // Substitute when we optimize for codesize and the new sequence has + // fewer instructions OR + // the new sequence neither lengthens the critical path nor increases + // resource pressure. + if (doSubstitute(NewInstCount, OldInstCount) || + (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs, + InstrIdxForVirtReg, P) && + preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) { + for (auto *InstrPtr : InsInstrs) + MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr); + for (auto *InstrPtr : DelInstrs) + InstrPtr->eraseFromParentAndMarkDBGValuesForRemoval(); + + Changed = true; + ++NumInstCombined; + + Traces->invalidate(MBB); + Traces->verifyAnalysis(); + // Eagerly stop after the first pattern fires. + break; + } else { + // Cleanup instructions of the alternative code sequence. There is no + // use for them. + MachineFunction *MF = MBB->getParent(); + for (auto *InstrPtr : InsInstrs) + MF->DeleteMachineInstr(InstrPtr); + } + InstrIdxForVirtReg.clear(); + } + } + + return Changed; +} + +bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) { + const TargetSubtargetInfo &STI = MF.getSubtarget(); + TII = STI.getInstrInfo(); + TRI = STI.getRegisterInfo(); + SchedModel = STI.getSchedModel(); + TSchedModel.init(SchedModel, &STI, TII); + MRI = &MF.getRegInfo(); + Traces = &getAnalysis<MachineTraceMetrics>(); + MinInstr = nullptr; + OptSize = MF.getFunction()->optForSize(); + + DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); + if (!TII->useMachineCombiner()) { + DEBUG(dbgs() << " Skipping pass: Target does not support machine combiner\n"); + return false; + } + + bool Changed = false; + + // Try to combine instructions. + for (auto &MBB : MF) + Changed |= combineInstructions(&MBB); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp new file mode 100644 index 0000000..a686341 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -0,0 +1,346 @@ +//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This is an extremely simple MachineInstr-level copy propagation pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "codegen-cp" + +STATISTIC(NumDeletes, "Number of dead copies deleted"); + +namespace { + class MachineCopyPropagation : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + + public: + static char ID; // Pass identification, replacement for typeid + MachineCopyPropagation() : MachineFunctionPass(ID) { + initializeMachineCopyPropagationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + typedef SmallVector<unsigned, 4> DestList; + typedef DenseMap<unsigned, DestList> SourceMap; + + void SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap); + bool CopyPropagateBlock(MachineBasicBlock &MBB); + }; +} +char MachineCopyPropagation::ID = 0; +char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID; + +INITIALIZE_PASS(MachineCopyPropagation, "machine-cp", + "Machine Copy Propagation Pass", false, false) + +void +MachineCopyPropagation::SourceNoLongerAvailable(unsigned Reg, + SourceMap &SrcMap, + DenseMap<unsigned, MachineInstr*> &AvailCopyMap) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + SourceMap::iterator SI = SrcMap.find(*AI); + if (SI != SrcMap.end()) { + const DestList& Defs = SI->second; + for (DestList::const_iterator I = Defs.begin(), E = Defs.end(); + I != E; ++I) { + unsigned MappedDef = *I; + // Source of copy is no longer available for propagation. + AvailCopyMap.erase(MappedDef); + for (MCSubRegIterator SR(MappedDef, TRI); SR.isValid(); ++SR) + AvailCopyMap.erase(*SR); + } + } + } +} + +static bool NoInterveningSideEffect(const MachineInstr *CopyMI, + const MachineInstr *MI) { + const MachineBasicBlock *MBB = CopyMI->getParent(); + if (MI->getParent() != MBB) + return false; + MachineBasicBlock::const_iterator I = CopyMI; + MachineBasicBlock::const_iterator E = MBB->end(); + MachineBasicBlock::const_iterator E2 = MI; + + ++I; + while (I != E && I != E2) { + if (I->hasUnmodeledSideEffects() || I->isCall() || + I->isTerminator()) + return false; + ++I; + } + return true; +} + +/// isNopCopy - Return true if the specified copy is really a nop. That is +/// if the source of the copy is the same of the definition of the copy that +/// supplied the source. If the source of the copy is a sub-register than it +/// must check the sub-indices match. e.g. +/// ecx = mov eax +/// al = mov cl +/// But not +/// ecx = mov eax +/// al = mov ch +static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, + const TargetRegisterInfo *TRI) { + unsigned SrcSrc = CopyMI->getOperand(1).getReg(); + if (Def == SrcSrc) + return true; + if (TRI->isSubRegister(SrcSrc, Def)) { + unsigned SrcDef = CopyMI->getOperand(0).getReg(); + unsigned SubIdx = TRI->getSubRegIndex(SrcSrc, Def); + if (!SubIdx) + return false; + return SubIdx == TRI->getSubRegIndex(SrcDef, Src); + } + + return false; +} + +bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { + SmallSetVector<MachineInstr*, 8> MaybeDeadCopies; // Candidates for deletion + DenseMap<unsigned, MachineInstr*> AvailCopyMap; // Def -> available copies map + DenseMap<unsigned, MachineInstr*> CopyMap; // Def -> copies map + SourceMap SrcMap; // Src -> Def map + + DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { + MachineInstr *MI = &*I; + ++I; + + if (MI->isCopy()) { + unsigned Def = MI->getOperand(0).getReg(); + unsigned Src = MI->getOperand(1).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Def) || + TargetRegisterInfo::isVirtualRegister(Src)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + DenseMap<unsigned, MachineInstr*>::iterator CI = AvailCopyMap.find(Src); + if (CI != AvailCopyMap.end()) { + MachineInstr *CopyMI = CI->second; + if (!MRI->isReserved(Def) && + (!MRI->isReserved(Src) || NoInterveningSideEffect(CopyMI, MI)) && + isNopCopy(CopyMI, Def, Src, TRI)) { + // The two copies cancel out and the source of the first copy + // hasn't been overridden, eliminate the second one. e.g. + // %ECX<def> = COPY %EAX<kill> + // ... nothing clobbered EAX. + // %EAX<def> = COPY %ECX + // => + // %ECX<def> = COPY %EAX + // + // Also avoid eliminating a copy from reserved registers unless the + // definition is proven not clobbered. e.g. + // %RSP<def> = COPY %RAX + // CALL + // %RAX<def> = COPY %RSP + + DEBUG(dbgs() << "MCP: copy is a NOP, removing: "; MI->dump()); + + // Clear any kills of Def between CopyMI and MI. This extends the + // live range. + for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) + I->clearRegisterKills(Def, TRI); + + MI->eraseFromParent(); + Changed = true; + ++NumDeletes; + continue; + } + } + + // If Src is defined by a previous copy, it cannot be eliminated. + for (MCRegAliasIterator AI(Src, TRI, true); AI.isValid(); ++AI) { + CI = CopyMap.find(*AI); + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is no longer dead: "; CI->second->dump()); + MaybeDeadCopies.remove(CI->second); + } + } + + DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump()); + + // Copy is now a candidate for deletion. + MaybeDeadCopies.insert(MI); + + // If 'Src' is previously source of another copy, then this earlier copy's + // source is no longer available. e.g. + // %xmm9<def> = copy %xmm2 + // ... + // %xmm2<def> = copy %xmm0 + // ... + // %xmm2<def> = copy %xmm9 + SourceNoLongerAvailable(Def, SrcMap, AvailCopyMap); + + // Remember Def is defined by the copy. + // ... Make sure to clear the def maps of aliases first. + for (MCRegAliasIterator AI(Def, TRI, false); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); + } + for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); + ++SR) { + CopyMap[*SR] = MI; + AvailCopyMap[*SR] = MI; + } + + // Remember source that's copied to Def. Once it's clobbered, then + // it's no longer available for copy propagation. + if (std::find(SrcMap[Src].begin(), SrcMap[Src].end(), Def) == + SrcMap[Src].end()) { + SrcMap[Src].push_back(Def); + } + + continue; + } + + // Not a copy. + SmallVector<unsigned, 2> Defs; + int RegMaskOpNum = -1; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + RegMaskOpNum = i; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (TargetRegisterInfo::isVirtualRegister(Reg)) + report_fatal_error("MachineCopyPropagation should be run after" + " register allocation!"); + + if (MO.isDef()) { + Defs.push_back(Reg); + continue; + } + + // If 'Reg' is defined by a copy, the copy is no longer a candidate + // for elimination. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + DenseMap<unsigned, MachineInstr*>::iterator CI = CopyMap.find(*AI); + if (CI != CopyMap.end()) { + DEBUG(dbgs() << "MCP: Copy is used - not dead: "; CI->second->dump()); + MaybeDeadCopies.remove(CI->second); + } + } + // Treat undef use like defs for copy propagation but not for + // dead copy. We would need to do a liveness check to be sure the copy + // is dead for undef uses. + // The backends are allowed to do whatever they want with undef value + // and we cannot be sure this register will not be rewritten to break + // some false dependencies for the hardware for instance. + if (MO.isUndef()) + Defs.push_back(Reg); + } + + // The instruction has a register mask operand which means that it clobbers + // a large set of registers. It is possible to use the register mask to + // prune the available copies, but treat it like a basic block boundary for + // now. + if (RegMaskOpNum >= 0) { + // Erase any MaybeDeadCopies whose destination register is clobbered. + const MachineOperand &MaskMO = MI->getOperand(RegMaskOpNum); + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + unsigned Reg = (*DI)->getOperand(0).getReg(); + if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) + continue; + DEBUG(dbgs() << "MCP: Removing copy due to regmask clobbering: "; + (*DI)->dump()); + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + + // Clear all data structures as if we were beginning a new basic block. + MaybeDeadCopies.clear(); + AvailCopyMap.clear(); + CopyMap.clear(); + SrcMap.clear(); + continue; + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + + // No longer defined by a copy. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + CopyMap.erase(*AI); + AvailCopyMap.erase(*AI); + } + + // If 'Reg' is previously source of a copy, it is no longer available for + // copy propagation. + SourceNoLongerAvailable(Reg, SrcMap, AvailCopyMap); + } + } + + // If MBB doesn't have successors, delete the copies whose defs are not used. + // If MBB does have successors, then conservative assume the defs are live-out + // since we don't want to trust live-in lists. + if (MBB.succ_empty()) { + for (SmallSetVector<MachineInstr*, 8>::iterator + DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); + DI != DE; ++DI) { + if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { + (*DI)->eraseFromParent(); + Changed = true; + ++NumDeletes; + } + } + } + + return Changed; +} + +bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + bool Changed = false; + + TRI = MF.getSubtarget().getRegisterInfo(); + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); + + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= CopyPropagateBlock(*I); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp new file mode 100644 index 0000000..acb7c48 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineDominanceFrontier.cpp @@ -0,0 +1,54 @@ +//===- MachineDominanceFrontier.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominanceFrontier.h" +#include "llvm/Analysis/DominanceFrontierImpl.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" + + +using namespace llvm; + +namespace llvm { +template class DominanceFrontierBase<MachineBasicBlock>; +template class ForwardDominanceFrontierBase<MachineBasicBlock>; +} + + +char MachineDominanceFrontier::ID = 0; + +INITIALIZE_PASS_BEGIN(MachineDominanceFrontier, "machine-domfrontier", + "Machine Dominance Frontier Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineDominanceFrontier, "machine-domfrontier", + "Machine Dominance Frontier Construction", true, true) + +MachineDominanceFrontier::MachineDominanceFrontier() + : MachineFunctionPass(ID), + Base() { + initializeMachineDominanceFrontierPass(*PassRegistry::getPassRegistry()); +} + +char &llvm::MachineDominanceFrontierID = MachineDominanceFrontier::ID; + +bool MachineDominanceFrontier::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + Base.analyze(getAnalysis<MachineDominatorTree>().getBase()); + return false; +} + +void MachineDominanceFrontier::releaseMemory() { + Base.releaseMemory(); +} + +void MachineDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} diff --git a/contrib/llvm/lib/CodeGen/MachineDominators.cpp b/contrib/llvm/lib/CodeGen/MachineDominators.cpp new file mode 100644 index 0000000..3f04bb0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineDominators.cpp @@ -0,0 +1,127 @@ +//===- MachineDominators.cpp - Machine Dominator Calculation --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// forward dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallBitVector.h" + +using namespace llvm; + +namespace llvm { +template class DomTreeNodeBase<MachineBasicBlock>; +template class DominatorTreeBase<MachineBasicBlock>; +} + +char MachineDominatorTree::ID = 0; + +INITIALIZE_PASS(MachineDominatorTree, "machinedomtree", + "MachineDominator Tree Construction", true, true) + +char &llvm::MachineDominatorsID = MachineDominatorTree::ID; + +void MachineDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) { + CriticalEdgesToSplit.clear(); + NewBBs.clear(); + DT->recalculate(F); + + return false; +} + +MachineDominatorTree::MachineDominatorTree() + : MachineFunctionPass(ID) { + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase<MachineBasicBlock>(false); +} + +MachineDominatorTree::~MachineDominatorTree() { + delete DT; +} + +void MachineDominatorTree::releaseMemory() { + DT->releaseMemory(); +} + +void MachineDominatorTree::print(raw_ostream &OS, const Module*) const { + DT->print(OS); +} + +void MachineDominatorTree::applySplitCriticalEdges() const { + // Bail out early if there is nothing to do. + if (CriticalEdgesToSplit.empty()) + return; + + // For each element in CriticalEdgesToSplit, remember whether or not element + // is the new immediate domminator of its successor. The mapping is done by + // index, i.e., the information for the ith element of CriticalEdgesToSplit is + // the ith element of IsNewIDom. + SmallBitVector IsNewIDom(CriticalEdgesToSplit.size(), true); + size_t Idx = 0; + + // Collect all the dominance properties info, before invalidating + // the underlying DT. + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // Update dominator information. + MachineBasicBlock *Succ = Edge.ToBB; + MachineDomTreeNode *SuccDTNode = DT->getNode(Succ); + + for (MachineBasicBlock *PredBB : Succ->predecessors()) { + if (PredBB == Edge.NewBB) + continue; + // If we are in this situation: + // FromBB1 FromBB2 + // + + + // + + + + + // + + + + + // ... Split1 Split2 ... + // + + + // + + + // + + // Succ + // Instead of checking the domiance property with Split2, we check it with + // FromBB2 since Split2 is still unknown of the underlying DT structure. + if (NewBBs.count(PredBB)) { + assert(PredBB->pred_size() == 1 && "A basic block resulting from a " + "critical edge split has more " + "than one predecessor!"); + PredBB = *PredBB->pred_begin(); + } + if (!DT->dominates(SuccDTNode, DT->getNode(PredBB))) { + IsNewIDom[Idx] = false; + break; + } + } + ++Idx; + } + + // Now, update DT with the collected dominance properties info. + Idx = 0; + for (CriticalEdge &Edge : CriticalEdgesToSplit) { + // We know FromBB dominates NewBB. + MachineDomTreeNode *NewDTNode = DT->addNewBlock(Edge.NewBB, Edge.FromBB); + + // If all the other predecessors of "Succ" are dominated by "Succ" itself + // then the new block is the new immediate dominator of "Succ". Otherwise, + // the new block doesn't dominate anything. + if (IsNewIDom[Idx]) + DT->changeImmediateDominator(DT->getNode(Edge.ToBB), NewDTNode); + ++Idx; + } + NewBBs.clear(); + CriticalEdgesToSplit.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp new file mode 100644 index 0000000..ca4bb1c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -0,0 +1,970 @@ +//===-- MachineFunction.cpp -----------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Collect native machine code information for a function. This allows +// target-specific information about the generated code to be stored with each +// function. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionInitializer.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "codegen" + +static cl::opt<unsigned> + AlignAllFunctions("align-all-functions", + cl::desc("Force the alignment of all functions."), + cl::init(0), cl::Hidden); + +void MachineFunctionInitializer::anchor() {} + +//===----------------------------------------------------------------------===// +// MachineFunction implementation +//===----------------------------------------------------------------------===// + +// Out-of-line virtual method. +MachineFunctionInfo::~MachineFunctionInfo() {} + +void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) { + MBB->getParent()->DeleteMachineBasicBlock(MBB); +} + +MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, + unsigned FunctionNum, MachineModuleInfo &mmi) + : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()), + MMI(mmi) { + if (STI->getRegisterInfo()) + RegInfo = new (Allocator) MachineRegisterInfo(this); + else + RegInfo = nullptr; + + MFInfo = nullptr; + FrameInfo = new (Allocator) + MachineFrameInfo(STI->getFrameLowering()->getStackAlignment(), + STI->getFrameLowering()->isStackRealignable(), + !F->hasFnAttribute("no-realign-stack")); + + if (Fn->hasFnAttribute(Attribute::StackAlignment)) + FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment()); + + ConstantPool = new (Allocator) MachineConstantPool(getDataLayout()); + Alignment = STI->getTargetLowering()->getMinFunctionAlignment(); + + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. + // FIXME: Use Function::optForSize(). + if (!Fn->hasFnAttribute(Attribute::OptimizeForSize)) + Alignment = std::max(Alignment, + STI->getTargetLowering()->getPrefFunctionAlignment()); + + if (AlignAllFunctions) + Alignment = AlignAllFunctions; + + FunctionNumber = FunctionNum; + JumpTableInfo = nullptr; + + if (isFuncletEHPersonality(classifyEHPersonality( + F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) { + WinEHInfo = new (Allocator) WinEHFuncInfo(); + } + + assert(TM.isCompatibleDataLayout(getDataLayout()) && + "Can't create a MachineFunction using a Module with a " + "Target-incompatible DataLayout attached\n"); + + PSVManager = llvm::make_unique<PseudoSourceValueManager>(); +} + +MachineFunction::~MachineFunction() { + // Don't call destructors on MachineInstr and MachineOperand. All of their + // memory comes from the BumpPtrAllocator which is about to be purged. + // + // Do call MachineBasicBlock destructors, it contains std::vectors. + for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I)) + I->Insts.clearAndLeakNodesUnsafely(); + + InstructionRecycler.clear(Allocator); + OperandRecycler.clear(Allocator); + BasicBlockRecycler.clear(Allocator); + if (RegInfo) { + RegInfo->~MachineRegisterInfo(); + Allocator.Deallocate(RegInfo); + } + if (MFInfo) { + MFInfo->~MachineFunctionInfo(); + Allocator.Deallocate(MFInfo); + } + + FrameInfo->~MachineFrameInfo(); + Allocator.Deallocate(FrameInfo); + + ConstantPool->~MachineConstantPool(); + Allocator.Deallocate(ConstantPool); + + if (JumpTableInfo) { + JumpTableInfo->~MachineJumpTableInfo(); + Allocator.Deallocate(JumpTableInfo); + } + + if (WinEHInfo) { + WinEHInfo->~WinEHFuncInfo(); + Allocator.Deallocate(WinEHInfo); + } +} + +const DataLayout &MachineFunction::getDataLayout() const { + return Fn->getParent()->getDataLayout(); +} + +/// Get the JumpTableInfo for this function. +/// If it does not already exist, allocate one. +MachineJumpTableInfo *MachineFunction:: +getOrCreateJumpTableInfo(unsigned EntryKind) { + if (JumpTableInfo) return JumpTableInfo; + + JumpTableInfo = new (Allocator) + MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); + return JumpTableInfo; +} + +/// Should we be emitting segmented stack stuff for the function +bool MachineFunction::shouldSplitStack() { + return getFunction()->hasFnAttribute("split-stack"); +} + +/// This discards all of the MachineBasicBlock numbers and recomputes them. +/// This guarantees that the MBB numbers are sequential, dense, and match the +/// ordering of the blocks within the function. If a specific MachineBasicBlock +/// is specified, only that block and those after it are renumbered. +void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) { + if (empty()) { MBBNumbering.clear(); return; } + MachineFunction::iterator MBBI, E = end(); + if (MBB == nullptr) + MBBI = begin(); + else + MBBI = MBB->getIterator(); + + // Figure out the block number this should have. + unsigned BlockNo = 0; + if (MBBI != begin()) + BlockNo = std::prev(MBBI)->getNumber() + 1; + + for (; MBBI != E; ++MBBI, ++BlockNo) { + if (MBBI->getNumber() != (int)BlockNo) { + // Remove use of the old number. + if (MBBI->getNumber() != -1) { + assert(MBBNumbering[MBBI->getNumber()] == &*MBBI && + "MBB number mismatch!"); + MBBNumbering[MBBI->getNumber()] = nullptr; + } + + // If BlockNo is already taken, set that block's number to -1. + if (MBBNumbering[BlockNo]) + MBBNumbering[BlockNo]->setNumber(-1); + + MBBNumbering[BlockNo] = &*MBBI; + MBBI->setNumber(BlockNo); + } + } + + // Okay, all the blocks are renumbered. If we have compactified the block + // numbering, shrink MBBNumbering now. + assert(BlockNo <= MBBNumbering.size() && "Mismatch!"); + MBBNumbering.resize(BlockNo); +} + +/// Allocate a new MachineInstr. Use this instead of `new MachineInstr'. +MachineInstr * +MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, + DebugLoc DL, bool NoImp) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(*this, MCID, DL, NoImp); +} + +/// Create a new MachineInstr which is a copy of the 'Orig' instruction, +/// identical in all ways except the instruction has no parent, prev, or next. +MachineInstr * +MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { + return new (InstructionRecycler.Allocate<MachineInstr>(Allocator)) + MachineInstr(*this, *Orig); +} + +/// Delete the given MachineInstr. +/// +/// This function also serves as the MachineInstr destructor - the real +/// ~MachineInstr() destructor must be empty. +void +MachineFunction::DeleteMachineInstr(MachineInstr *MI) { + // Strip it for parts. The operand array and the MI object itself are + // independently recyclable. + if (MI->Operands) + deallocateOperandArray(MI->CapOperands, MI->Operands); + // Don't call ~MachineInstr() which must be trivial anyway because + // ~MachineFunction drops whole lists of MachineInstrs wihout calling their + // destructors. + InstructionRecycler.Deallocate(Allocator, MI); +} + +/// Allocate a new MachineBasicBlock. Use this instead of +/// `new MachineBasicBlock'. +MachineBasicBlock * +MachineFunction::CreateMachineBasicBlock(const BasicBlock *bb) { + return new (BasicBlockRecycler.Allocate<MachineBasicBlock>(Allocator)) + MachineBasicBlock(*this, bb); +} + +/// Delete the given MachineBasicBlock. +void +MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) { + assert(MBB->getParent() == this && "MBB parent mismatch!"); + MBB->~MachineBasicBlock(); + BasicBlockRecycler.Deallocate(Allocator, MBB); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f, + uint64_t s, unsigned base_alignment, + const AAMDNodes &AAInfo, + const MDNode *Ranges) { + return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment, + AAInfo, Ranges); +} + +MachineMemOperand * +MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, + int64_t Offset, uint64_t Size) { + if (MMO->getValue()) + return new (Allocator) + MachineMemOperand(MachinePointerInfo(MMO->getValue(), + MMO->getOffset()+Offset), + MMO->getFlags(), Size, + MMO->getBaseAlignment()); + return new (Allocator) + MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(), + MMO->getOffset()+Offset), + MMO->getFlags(), Size, + MMO->getBaseAlignment()); +} + +MachineInstr::mmo_iterator +MachineFunction::allocateMemRefsArray(unsigned long Num) { + return Allocator.Allocate<MachineMemOperand *>(Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isLoad()) + ++Num; + + // Allocate a new array and populate it with the load information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isLoad()) { + if (!(*I)->isStore()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the store flag. + MachineMemOperand *JustLoad = + getMachineMemOperand((*I)->getPointerInfo(), + (*I)->getFlags() & ~MachineMemOperand::MOStore, + (*I)->getSize(), (*I)->getBaseAlignment(), + (*I)->getAAInfo()); + Result[Index] = JustLoad; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +std::pair<MachineInstr::mmo_iterator, MachineInstr::mmo_iterator> +MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin, + MachineInstr::mmo_iterator End) { + // Count the number of load mem refs. + unsigned Num = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) + if ((*I)->isStore()) + ++Num; + + // Allocate a new array and populate it with the store information. + MachineInstr::mmo_iterator Result = allocateMemRefsArray(Num); + unsigned Index = 0; + for (MachineInstr::mmo_iterator I = Begin; I != End; ++I) { + if ((*I)->isStore()) { + if (!(*I)->isLoad()) + // Reuse the MMO. + Result[Index] = *I; + else { + // Clone the MMO and unset the load flag. + MachineMemOperand *JustStore = + getMachineMemOperand((*I)->getPointerInfo(), + (*I)->getFlags() & ~MachineMemOperand::MOLoad, + (*I)->getSize(), (*I)->getBaseAlignment(), + (*I)->getAAInfo()); + Result[Index] = JustStore; + } + ++Index; + } + } + return std::make_pair(Result, Result + Num); +} + +const char *MachineFunction::createExternalSymbolName(StringRef Name) { + char *Dest = Allocator.Allocate<char>(Name.size() + 1); + std::copy(Name.begin(), Name.end(), Dest); + Dest[Name.size()] = 0; + return Dest; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineFunction::dump() const { + print(dbgs()); +} +#endif + +StringRef MachineFunction::getName() const { + assert(getFunction() && "No function!"); + return getFunction()->getName(); +} + +void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { + OS << "# Machine code for function " << getName() << ": "; + if (RegInfo) { + OS << (RegInfo->isSSA() ? "SSA" : "Post SSA"); + if (!RegInfo->tracksLiveness()) + OS << ", not tracking liveness"; + } + OS << '\n'; + + // Print Frame Information + FrameInfo->print(*this, OS); + + // Print JumpTable Information + if (JumpTableInfo) + JumpTableInfo->print(OS); + + // Print Constant Pool + ConstantPool->print(OS); + + const TargetRegisterInfo *TRI = getSubtarget().getRegisterInfo(); + + if (RegInfo && !RegInfo->livein_empty()) { + OS << "Function Live Ins: "; + for (MachineRegisterInfo::livein_iterator + I = RegInfo->livein_begin(), E = RegInfo->livein_end(); I != E; ++I) { + OS << PrintReg(I->first, TRI); + if (I->second) + OS << " in " << PrintReg(I->second, TRI); + if (std::next(I) != E) + OS << ", "; + } + OS << '\n'; + } + + ModuleSlotTracker MST(getFunction()->getParent()); + MST.incorporateFunction(*getFunction()); + for (const auto &BB : *this) { + OS << '\n'; + BB.print(OS, MST, Indexes); + } + + OS << "\n# End machine code for function " << getName() << ".\n\n"; +} + +namespace llvm { + template<> + struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const MachineFunction *F) { + return ("CFG for '" + F->getName() + "' function").str(); + } + + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + std::string OutStr; + { + raw_string_ostream OSS(OutStr); + + if (isSimple()) { + OSS << "BB#" << Node->getNumber(); + if (const BasicBlock *BB = Node->getBasicBlock()) + OSS << ": " << BB->getName(); + } else + Node->print(OSS); + } + + if (OutStr[0] == '\n') OutStr.erase(OutStr.begin()); + + // Process string output to make it nicer... + for (unsigned i = 0; i != OutStr.length(); ++i) + if (OutStr[i] == '\n') { // Left justify + OutStr[i] = '\\'; + OutStr.insert(OutStr.begin()+i+1, 'l'); + } + return OutStr; + } + }; +} + +void MachineFunction::viewCFG() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getName()); +#else + errs() << "MachineFunction::viewCFG is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +void MachineFunction::viewCFGOnly() const +{ +#ifndef NDEBUG + ViewGraph(this, "mf" + getName(), true); +#else + errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Add the specified physical register as a live-in value and +/// create a corresponding virtual register for it. +unsigned MachineFunction::addLiveIn(unsigned PReg, + const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = getRegInfo(); + unsigned VReg = MRI.getLiveInVirtReg(PReg); + if (VReg) { + const TargetRegisterClass *VRegRC = MRI.getRegClass(VReg); + (void)VRegRC; + // A physical register can be added several times. + // Between two calls, the register class of the related virtual register + // may have been constrained to match some operation constraints. + // In that case, check that the current register class includes the + // physical register and is a sub class of the specified RC. + assert((VRegRC == RC || (VRegRC->contains(PReg) && + RC->hasSubClassEq(VRegRC))) && + "Register class mismatch!"); + return VReg; + } + VReg = MRI.createVirtualRegister(RC); + MRI.addLiveIn(PReg, VReg); + return VReg; +} + +/// Return the MCSymbol for the specified non-empty jump table. +/// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a +/// normal 'L' label is returned. +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, + bool isLinkerPrivate) const { + const DataLayout &DL = getDataLayout(); + assert(JumpTableInfo && "No jump tables"); + assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!"); + + const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix() + : DL.getPrivateGlobalPrefix(); + SmallString<60> Name; + raw_svector_ostream(Name) + << Prefix << "JTI" << getFunctionNumber() << '_' << JTI; + return Ctx.getOrCreateSymbol(Name); +} + +/// Return a function-local symbol to represent the PIC base. +MCSymbol *MachineFunction::getPICBaseSymbol() const { + const DataLayout &DL = getDataLayout(); + return Ctx.getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) + + Twine(getFunctionNumber()) + "$pb"); +} + +//===----------------------------------------------------------------------===// +// MachineFrameInfo implementation +//===----------------------------------------------------------------------===// + +/// Make sure the function is at least Align bytes aligned. +void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { + if (!StackRealignable || !RealignOption) + assert(Align <= StackAlignment && + "For targets without stack realignment, Align is out of limit!"); + if (MaxAlignment < Align) MaxAlignment = Align; +} + +/// Clamp the alignment if requested and emit a warning. +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); + return StackAlign; +} + +/// Create a new statically sized stack object, returning a nonnegative +/// identifier to represent it. +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, const AllocaInst *Alloca) { + assert(Size != 0 && "Cannot allocate zero size stack objects!"); + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, Alloca, + !isSS)); + int Index = (int)Objects.size() - NumFixedObjects - 1; + assert(Index >= 0 && "Bad frame index!"); + ensureMaxAlignment(Alignment); + return Index; +} + +/// Create a new statically sized stack object that represents a spill slot, +/// returning a nonnegative identifier to represent it. +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, + unsigned Alignment) { + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + CreateStackObject(Size, Alignment, true); + int Index = (int)Objects.size() - NumFixedObjects - 1; + ensureMaxAlignment(Alignment); + return Index; +} + +/// Notify the MachineFrameInfo object that a variable sized object has been +/// created. This must be created whenever a variable sized object is created, +/// whether or not the index returned is actually used. +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, + const AllocaInst *Alloca) { + HasVarSizedObjects = true; + Alignment = clampStackAlignment(!StackRealignable || !RealignOption, + Alignment, StackAlignment); + Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca, true)); + ensureMaxAlignment(Alignment); + return (int)Objects.size()-NumFixedObjects-1; +} + +/// Create a new object at a fixed location on the stack. +/// All fixed objects should be created before other objects are created for +/// efficiency. By default, fixed objects are immutable. This returns an +/// index with a negative value. +int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, + bool Immutable, bool isAliased) { + assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned Align = MinAlign(SPOffset, StackAlignment); + Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, + StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/ false, + /*Alloca*/ nullptr, isAliased)); + return -++NumFixedObjects; +} + +/// Create a spill slot at a fixed location on the stack. +/// Returns an index with a negative value. +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset) { + unsigned Align = MinAlign(SPOffset, StackAlignment); + Align = clampStackAlignment(!StackRealignable || !RealignOption, Align, + StackAlignment); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, + /*Immutable*/ true, + /*isSS*/ true, + /*Alloca*/ nullptr, + /*isAliased*/ false)); + return -++NumFixedObjects; +} + +BitVector MachineFrameInfo::getPristineRegs(const MachineFunction &MF) const { + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + BitVector BV(TRI->getNumRegs()); + + // Before CSI is calculated, no registers are considered pristine. They can be + // freely used and PEI will make sure they are saved. + if (!isCalleeSavedInfoValid()) + return BV; + + for (const MCPhysReg *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) + BV.set(*CSR); + + // Saved CSRs are not pristine. + for (auto &I : getCalleeSavedInfo()) + for (MCSubRegIterator S(I.getReg(), TRI, true); S.isValid(); ++S) + BV.reset(*S); + + return BV; +} + +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} + +void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ + if (Objects.empty()) return; + + const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering(); + int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0); + + OS << "Frame Objects:\n"; + + for (unsigned i = 0, e = Objects.size(); i != e; ++i) { + const StackObject &SO = Objects[i]; + OS << " fi#" << (int)(i-NumFixedObjects) << ": "; + if (SO.Size == ~0ULL) { + OS << "dead\n"; + continue; + } + if (SO.Size == 0) + OS << "variable sized"; + else + OS << "size=" << SO.Size; + OS << ", align=" << SO.Alignment; + + if (i < NumFixedObjects) + OS << ", fixed"; + if (i < NumFixedObjects || SO.SPOffset != -1) { + int64_t Off = SO.SPOffset - ValOffset; + OS << ", at location [SP"; + if (Off > 0) + OS << "+" << Off; + else if (Off < 0) + OS << Off; + OS << "]"; + } + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineFrameInfo::dump(const MachineFunction &MF) const { + print(MF, dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// MachineJumpTableInfo implementation +//===----------------------------------------------------------------------===// + +/// Return the size of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntrySize(const DataLayout &TD) const { + // The size of a jump table entry is 4 bytes unless the entry is just the + // address of a block, in which case it is the pointer size. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerSize(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return 8; + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return 4; + case MachineJumpTableInfo::EK_Inline: + return 0; + } + llvm_unreachable("Unknown jump table encoding!"); +} + +/// Return the alignment of each entry in the jump table. +unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const { + // The alignment of a jump table entry is the alignment of int32 unless the + // entry is just the address of a block, in which case it is the pointer + // alignment. + switch (getEntryKind()) { + case MachineJumpTableInfo::EK_BlockAddress: + return TD.getPointerABIAlignment(); + case MachineJumpTableInfo::EK_GPRel64BlockAddress: + return TD.getABIIntegerTypeAlignment(64); + case MachineJumpTableInfo::EK_GPRel32BlockAddress: + case MachineJumpTableInfo::EK_LabelDifference32: + case MachineJumpTableInfo::EK_Custom32: + return TD.getABIIntegerTypeAlignment(32); + case MachineJumpTableInfo::EK_Inline: + return 1; + } + llvm_unreachable("Unknown jump table encoding!"); +} + +/// Create a new jump table entry in the jump table info. +unsigned MachineJumpTableInfo::createJumpTableIndex( + const std::vector<MachineBasicBlock*> &DestBBs) { + assert(!DestBBs.empty() && "Cannot create an empty jump table!"); + JumpTables.push_back(MachineJumpTableEntry(DestBBs)); + return JumpTables.size()-1; +} + +/// If Old is the target of any jump tables, update the jump tables to branch +/// to New instead. +bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + for (size_t i = 0, e = JumpTables.size(); i != e; ++i) + ReplaceMBBInJumpTable(i, Old, New); + return MadeChange; +} + +/// If Old is a target of the jump tables, update the jump table to branch to +/// New instead. +bool MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx, + MachineBasicBlock *Old, + MachineBasicBlock *New) { + assert(Old != New && "Not making a change?"); + bool MadeChange = false; + MachineJumpTableEntry &JTE = JumpTables[Idx]; + for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j) + if (JTE.MBBs[j] == Old) { + JTE.MBBs[j] = New; + MadeChange = true; + } + return MadeChange; +} + +void MachineJumpTableInfo::print(raw_ostream &OS) const { + if (JumpTables.empty()) return; + + OS << "Jump Tables:\n"; + + for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) { + OS << " jt#" << i << ": "; + for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j) + OS << " BB#" << JumpTables[i].MBBs[j]->getNumber(); + } + + OS << '\n'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineJumpTableInfo::dump() const { print(dbgs()); } +#endif + + +//===----------------------------------------------------------------------===// +// MachineConstantPool implementation +//===----------------------------------------------------------------------===// + +void MachineConstantPoolValue::anchor() { } + +Type *MachineConstantPoolEntry::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool MachineConstantPoolEntry::needsRelocation() const { + if (isMachineConstantPoolEntry()) + return true; + return Val.ConstVal->needsRelocation(); +} + +SectionKind +MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const { + if (needsRelocation()) + return SectionKind::getReadOnlyWithRel(); + switch (DL->getTypeAllocSize(getType())) { + case 4: + return SectionKind::getMergeableConst4(); + case 8: + return SectionKind::getMergeableConst8(); + case 16: + return SectionKind::getMergeableConst16(); + default: + return SectionKind::getReadOnly(); + } +} + +MachineConstantPool::~MachineConstantPool() { + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (Constants[i].isMachineConstantPoolEntry()) + delete Constants[i].Val.MachineCPVal; + for (DenseSet<MachineConstantPoolValue*>::iterator I = + MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end(); + I != E; ++I) + delete *I; +} + +/// Test whether the given two constants can be allocated the same constant pool +/// entry. +static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, + const DataLayout &DL) { + // Handle the trivial case quickly. + if (A == B) return true; + + // If they have the same type but weren't the same constant, quickly + // reject them. + if (A->getType() == B->getType()) return false; + + // We can't handle structs or arrays. + if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) || + isa<StructType>(B->getType()) || isa<ArrayType>(B->getType())) + return false; + + // For now, only support constants with the same size. + uint64_t StoreSize = DL.getTypeStoreSize(A->getType()); + if (StoreSize != DL.getTypeStoreSize(B->getType()) || StoreSize > 128) + return false; + + Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); + + // Try constant folding a bitcast of both instructions to an integer. If we + // get two identical ConstantInt's, then we are good to share them. We use + // the constant folding APIs to do this so that we get the benefit of + // DataLayout. + if (isa<PointerType>(A->getType())) + A = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast<Constant *>(A), DL); + else if (A->getType() != IntTy) + A = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast<Constant *>(A), DL); + if (isa<PointerType>(B->getType())) + B = ConstantFoldInstOperands(Instruction::PtrToInt, IntTy, + const_cast<Constant *>(B), DL); + else if (B->getType() != IntTy) + B = ConstantFoldInstOperands(Instruction::BitCast, IntTy, + const_cast<Constant *>(B), DL); + + return A == B; +} + +/// Create a new entry in the constant pool or return an existing one. +/// User must specify the log2 of the minimum required alignment for the object. +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + for (unsigned i = 0, e = Constants.size(); i != e; ++i) + if (!Constants[i].isMachineConstantPoolEntry() && + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, DL)) { + if ((unsigned)Constants[i].getAlignment() < Alignment) + Constants[i].Alignment = Alignment; + return i; + } + + Constants.push_back(MachineConstantPoolEntry(C, Alignment)); + return Constants.size()-1; +} + +unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, + unsigned Alignment) { + assert(Alignment && "Alignment must be specified!"); + if (Alignment > PoolAlignment) PoolAlignment = Alignment; + + // Check to see if we already have this constant. + // + // FIXME, this could be made much more efficient for large constant pools. + int Idx = V->getExistingMachineCPValue(this, Alignment); + if (Idx != -1) { + MachineCPVsSharingEntries.insert(V); + return (unsigned)Idx; + } + + Constants.push_back(MachineConstantPoolEntry(V, Alignment)); + return Constants.size()-1; +} + +void MachineConstantPool::print(raw_ostream &OS) const { + if (Constants.empty()) return; + + OS << "Constant Pool:\n"; + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + OS << " cp#" << i << ": "; + if (Constants[i].isMachineConstantPoolEntry()) + Constants[i].Val.MachineCPVal->print(OS); + else + Constants[i].Val.ConstVal->printAsOperand(OS, /*PrintType=*/false); + OS << ", align=" << Constants[i].getAlignment(); + OS << "\n"; + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineConstantPool::dump() const { print(dbgs()); } +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp new file mode 100644 index 0000000..338cd1e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp @@ -0,0 +1,60 @@ +//===-- MachineFunctionAnalysis.cpp ---------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionAnalysis members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionInitializer.h" +using namespace llvm; + +char MachineFunctionAnalysis::ID = 0; + +MachineFunctionAnalysis::MachineFunctionAnalysis( + const TargetMachine &tm, MachineFunctionInitializer *MFInitializer) + : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) { + initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); +} + +MachineFunctionAnalysis::~MachineFunctionAnalysis() { + releaseMemory(); + assert(!MF && "MachineFunctionAnalysis left initialized!"); +} + +void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineModuleInfo>(); +} + +bool MachineFunctionAnalysis::doInitialization(Module &M) { + MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + assert(MMI && "MMI not around yet??"); + MMI->setModule(&M); + NextFnNum = 0; + return false; +} + + +bool MachineFunctionAnalysis::runOnFunction(Function &F) { + assert(!MF && "MachineFunctionAnalysis already initialized!"); + MF = new MachineFunction(&F, TM, NextFnNum++, + getAnalysis<MachineModuleInfo>()); + if (MFInitializer) + MFInitializer->initializeMachineFunction(*MF); + return false; +} + +void MachineFunctionAnalysis::releaseMemory() { + delete MF; + MF = nullptr; +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp new file mode 100644 index 0000000..05463fc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp @@ -0,0 +1,68 @@ +//===-- MachineFunctionPass.cpp -------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the definitions of the MachineFunctionPass members. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +using namespace llvm; + +Pass *MachineFunctionPass::createPrinterPass(raw_ostream &O, + const std::string &Banner) const { + return createMachineFunctionPrinterPass(O, Banner); +} + +bool MachineFunctionPass::runOnFunction(Function &F) { + // Do not codegen any 'available_externally' functions at all, they have + // definitions outside the translation unit. + if (F.hasAvailableExternallyLinkage()) + return false; + + MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF(); + return runOnMachineFunction(MF); +} + +void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineFunctionAnalysis>(); + AU.addPreserved<MachineFunctionAnalysis>(); + + // MachineFunctionPass preserves all LLVM IR passes, but there's no + // high-level way to express this. Instead, just list a bunch of + // passes explicitly. This does not include setPreservesCFG, + // because CodeGen overloads that to mean preserving the MachineBasicBlock + // CFG in addition to the LLVM IR CFG. + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<DominanceFrontier>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<IVUsers>(); + AU.addPreserved<LoopInfoWrapperPass>(); + AU.addPreserved<MemoryDependenceAnalysis>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreserved<StackProtector>(); + + FunctionPass::getAnalysisUsage(AU); +} diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp new file mode 100644 index 0000000..790f5ac --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -0,0 +1,67 @@ +//===-- MachineFunctionPrinterPass.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineFunctionPrinterPass implementation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +/// MachineFunctionPrinterPass - This is a pass to dump the IR of a +/// MachineFunction. +/// +struct MachineFunctionPrinterPass : public MachineFunctionPass { + static char ID; + + raw_ostream &OS; + const std::string Banner; + + MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { } + MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner) + : MachineFunctionPass(ID), OS(os), Banner(banner) {} + + const char *getPassName() const override { return "MachineFunction Printer"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + OS << "# " << Banner << ":\n"; + MF.print(OS, getAnalysisIfAvailable<SlotIndexes>()); + return false; + } +}; + +char MachineFunctionPrinterPass::ID = 0; +} + +char &llvm::MachineFunctionPrinterPassID = MachineFunctionPrinterPass::ID; +INITIALIZE_PASS(MachineFunctionPrinterPass, "machineinstr-printer", + "Machine Function Printer", false, false) + +namespace llvm { +/// Returns a newly-created MachineFunction Printer pass. The +/// default banner is empty. +/// +MachineFunctionPass *createMachineFunctionPrinterPass(raw_ostream &OS, + const std::string &Banner){ + return new MachineFunctionPrinterPass(OS, Banner); +} + +} diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp new file mode 100644 index 0000000..6b8eecc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -0,0 +1,2057 @@ +//===-- lib/CodeGen/MachineInstr.cpp --------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Methods common to all machine instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" +#include "llvm/MC/MCInstrDesc.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +static cl::opt<bool> PrintWholeRegMask( + "print-whole-regmask", + cl::desc("Print the full contents of regmask operands in IR dumps"), + cl::init(true), cl::Hidden); + +//===----------------------------------------------------------------------===// +// MachineOperand Implementation +//===----------------------------------------------------------------------===// + +void MachineOperand::setReg(unsigned Reg) { + if (getReg() == Reg) return; // No change. + + // Otherwise, we have to change the register. If this operand is embedded + // into a machine function, we need to update the old and new register's + // use/def lists. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + SmallContents.RegNo = Reg; + MRI.addRegOperandToUseList(this); + return; + } + + // Otherwise, just change the register, no problem. :) + SmallContents.RegNo = Reg; +} + +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + // Note that getSubReg() may return 0 if the sub-register doesn't exist. + // That won't happen in legal code. + setSubReg(0); + } + setReg(Reg); +} + +/// Change a def to a use, or a use to a def. +void MachineOperand::setIsDef(bool Val) { + assert(isReg() && "Wrong MachineOperand accessor"); + assert((!Val || !isDebug()) && "Marking a debug operation as def"); + if (IsDef == Val) + return; + // MRI may keep uses and defs in different list positions. + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + MRI.removeRegOperandFromUseList(this); + IsDef = Val; + MRI.addRegOperandToUseList(this); + return; + } + IsDef = Val; +} + +// If this operand is currently a register operand, and if this is in a +// function, deregister the operand from the register's use/def list. +void MachineOperand::removeRegFromUses() { + if (!isReg() || !isOnRegUseList()) + return; + + if (MachineInstr *MI = getParent()) { + if (MachineBasicBlock *MBB = MI->getParent()) { + if (MachineFunction *MF = MBB->getParent()) + MF->getRegInfo().removeRegOperandFromUseList(this); + } + } +} + +/// ChangeToImmediate - Replace this operand with a new immediate operand of +/// the specified value. If an operand is known to be an immediate already, +/// the setImm method should be used. +void MachineOperand::ChangeToImmediate(int64_t ImmVal) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_Immediate; + Contents.ImmVal = ImmVal; +} + +void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) { + assert((!isReg() || !isTied()) && "Cannot change a tied operand into an imm"); + + removeRegFromUses(); + + OpKind = MO_FPImmediate; + Contents.CFP = FPImm; +} + +void MachineOperand::ChangeToES(const char *SymName, unsigned char TargetFlags) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an external symbol"); + + removeRegFromUses(); + + OpKind = MO_ExternalSymbol; + Contents.OffsetedInfo.Val.SymbolName = SymName; + setOffset(0); // Offset is always 0. + setTargetFlags(TargetFlags); +} + +void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) { + assert((!isReg() || !isTied()) && + "Cannot change a tied operand into an MCSymbol"); + + removeRegFromUses(); + + OpKind = MO_MCSymbol; + Contents.Sym = Sym; +} + +/// ChangeToRegister - Replace this operand with a new register operand of +/// the specified value. If an operand is known to be an register already, +/// the setReg method should be used. +void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, + bool isKill, bool isDead, bool isUndef, + bool isDebug) { + MachineRegisterInfo *RegInfo = nullptr; + if (MachineInstr *MI = getParent()) + if (MachineBasicBlock *MBB = MI->getParent()) + if (MachineFunction *MF = MBB->getParent()) + RegInfo = &MF->getRegInfo(); + // If this operand is already a register operand, remove it from the + // register's use/def lists. + bool WasReg = isReg(); + if (RegInfo && WasReg) + RegInfo->removeRegOperandFromUseList(this); + + // Change this to a register and set the reg#. + OpKind = MO_Register; + SmallContents.RegNo = Reg; + SubReg_TargetFlags = 0; + IsDef = isDef; + IsImp = isImp; + IsKill = isKill; + IsDead = isDead; + IsUndef = isUndef; + IsInternalRead = false; + IsEarlyClobber = false; + IsDebug = isDebug; + // Ensure isOnRegUseList() returns false. + Contents.Reg.Prev = nullptr; + // Preserve the tie when the operand was already a register. + if (!WasReg) + TiedTo = 0; + + // If this operand is embedded in a function, add the operand to the + // register's use/def list. + if (RegInfo) + RegInfo->addRegOperandToUseList(this); +} + +/// isIdenticalTo - Return true if this operand is identical to the specified +/// operand. Note that this should stay in sync with the hash_value overload +/// below. +bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const { + if (getType() != Other.getType() || + getTargetFlags() != Other.getTargetFlags()) + return false; + + switch (getType()) { + case MachineOperand::MO_Register: + return getReg() == Other.getReg() && isDef() == Other.isDef() && + getSubReg() == Other.getSubReg(); + case MachineOperand::MO_Immediate: + return getImm() == Other.getImm(); + case MachineOperand::MO_CImmediate: + return getCImm() == Other.getCImm(); + case MachineOperand::MO_FPImmediate: + return getFPImm() == Other.getFPImm(); + case MachineOperand::MO_MachineBasicBlock: + return getMBB() == Other.getMBB(); + case MachineOperand::MO_FrameIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return getIndex() == Other.getIndex() && getOffset() == Other.getOffset(); + case MachineOperand::MO_JumpTableIndex: + return getIndex() == Other.getIndex(); + case MachineOperand::MO_GlobalAddress: + return getGlobal() == Other.getGlobal() && getOffset() == Other.getOffset(); + case MachineOperand::MO_ExternalSymbol: + return !strcmp(getSymbolName(), Other.getSymbolName()) && + getOffset() == Other.getOffset(); + case MachineOperand::MO_BlockAddress: + return getBlockAddress() == Other.getBlockAddress() && + getOffset() == Other.getOffset(); + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return getRegMask() == Other.getRegMask(); + case MachineOperand::MO_MCSymbol: + return getMCSymbol() == Other.getMCSymbol(); + case MachineOperand::MO_CFIIndex: + return getCFIIndex() == Other.getCFIIndex(); + case MachineOperand::MO_Metadata: + return getMetadata() == Other.getMetadata(); + } + llvm_unreachable("Invalid machine operand type"); +} + +// Note: this must stay exactly in sync with isIdenticalTo above. +hash_code llvm::hash_value(const MachineOperand &MO) { + switch (MO.getType()) { + case MachineOperand::MO_Register: + // Register operands don't have target flags. + return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef()); + case MachineOperand::MO_Immediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm()); + case MachineOperand::MO_CImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCImm()); + case MachineOperand::MO_FPImmediate: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getFPImm()); + case MachineOperand::MO_MachineBasicBlock: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMBB()); + case MachineOperand::MO_FrameIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_TargetIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex(), + MO.getOffset()); + case MachineOperand::MO_JumpTableIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex()); + case MachineOperand::MO_ExternalSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(), + MO.getSymbolName()); + case MachineOperand::MO_GlobalAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(), + MO.getOffset()); + case MachineOperand::MO_BlockAddress: + return hash_combine(MO.getType(), MO.getTargetFlags(), + MO.getBlockAddress(), MO.getOffset()); + case MachineOperand::MO_RegisterMask: + case MachineOperand::MO_RegisterLiveOut: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getRegMask()); + case MachineOperand::MO_Metadata: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMetadata()); + case MachineOperand::MO_MCSymbol: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol()); + case MachineOperand::MO_CFIIndex: + return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex()); + } + llvm_unreachable("Invalid machine operand type"); +} + +void MachineOperand::print(raw_ostream &OS, + const TargetRegisterInfo *TRI) const { + ModuleSlotTracker DummyMST(nullptr); + print(OS, DummyMST, TRI); +} + +void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST, + const TargetRegisterInfo *TRI) const { + switch (getType()) { + case MachineOperand::MO_Register: + OS << PrintReg(getReg(), TRI, getSubReg()); + + if (isDef() || isKill() || isDead() || isImplicit() || isUndef() || + isInternalRead() || isEarlyClobber() || isTied()) { + OS << '<'; + bool NeedComma = false; + if (isDef()) { + if (NeedComma) OS << ','; + if (isEarlyClobber()) + OS << "earlyclobber,"; + if (isImplicit()) + OS << "imp-"; + OS << "def"; + NeedComma = true; + // <def,read-undef> only makes sense when getSubReg() is set. + // Don't clutter the output otherwise. + if (isUndef() && getSubReg()) + OS << ",read-undef"; + } else if (isImplicit()) { + OS << "imp-use"; + NeedComma = true; + } + + if (isKill()) { + if (NeedComma) OS << ','; + OS << "kill"; + NeedComma = true; + } + if (isDead()) { + if (NeedComma) OS << ','; + OS << "dead"; + NeedComma = true; + } + if (isUndef() && isUse()) { + if (NeedComma) OS << ','; + OS << "undef"; + NeedComma = true; + } + if (isInternalRead()) { + if (NeedComma) OS << ','; + OS << "internal"; + NeedComma = true; + } + if (isTied()) { + if (NeedComma) OS << ','; + OS << "tied"; + if (TiedTo != 15) + OS << unsigned(TiedTo - 1); + } + OS << '>'; + } + break; + case MachineOperand::MO_Immediate: + OS << getImm(); + break; + case MachineOperand::MO_CImmediate: + getCImm()->getValue().print(OS, false); + break; + case MachineOperand::MO_FPImmediate: + if (getFPImm()->getType()->isFloatTy()) + OS << getFPImm()->getValueAPF().convertToFloat(); + else + OS << getFPImm()->getValueAPF().convertToDouble(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << "<BB#" << getMBB()->getNumber() << ">"; + break; + case MachineOperand::MO_FrameIndex: + OS << "<fi#" << getIndex() << '>'; + break; + case MachineOperand::MO_ConstantPoolIndex: + OS << "<cp#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_TargetIndex: + OS << "<ti#" << getIndex(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_JumpTableIndex: + OS << "<jt#" << getIndex() << '>'; + break; + case MachineOperand::MO_GlobalAddress: + OS << "<ga:"; + getGlobal()->printAsOperand(OS, /*PrintType=*/false, MST); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_ExternalSymbol: + OS << "<es:" << getSymbolName(); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_BlockAddress: + OS << '<'; + getBlockAddress()->printAsOperand(OS, /*PrintType=*/false, MST); + if (getOffset()) OS << "+" << getOffset(); + OS << '>'; + break; + case MachineOperand::MO_RegisterMask: { + unsigned NumRegsInMask = 0; + unsigned NumRegsEmitted = 0; + OS << "<regmask"; + for (unsigned i = 0; i < TRI->getNumRegs(); ++i) { + unsigned MaskWord = i / 32; + unsigned MaskBit = i % 32; + if (getRegMask()[MaskWord] & (1 << MaskBit)) { + if (PrintWholeRegMask || NumRegsEmitted <= 10) { + OS << " " << PrintReg(i, TRI); + NumRegsEmitted++; + } + NumRegsInMask++; + } + } + if (NumRegsEmitted != NumRegsInMask) + OS << " and " << (NumRegsInMask - NumRegsEmitted) << " more..."; + OS << ">"; + break; + } + case MachineOperand::MO_RegisterLiveOut: + OS << "<regliveout>"; + break; + case MachineOperand::MO_Metadata: + OS << '<'; + getMetadata()->printAsOperand(OS, MST); + OS << '>'; + break; + case MachineOperand::MO_MCSymbol: + OS << "<MCSym=" << *getMCSymbol() << '>'; + break; + case MachineOperand::MO_CFIIndex: + OS << "<call frame instruction>"; + break; + } + + if (unsigned TF = getTargetFlags()) + OS << "[TF=" << TF << ']'; +} + +//===----------------------------------------------------------------------===// +// MachineMemOperand Implementation +//===----------------------------------------------------------------------===// + +/// getAddrSpace - Return the LLVM IR address space number that this pointer +/// points into. +unsigned MachinePointerInfo::getAddrSpace() const { + if (V.isNull() || V.is<const PseudoSourceValue*>()) return 0; + return cast<PointerType>(V.get<const Value*>()->getType())->getAddressSpace(); +} + +/// getConstantPool - Return a MachinePointerInfo record that refers to the +/// constant pool. +MachinePointerInfo MachinePointerInfo::getConstantPool(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getConstantPool()); +} + +/// getFixedStack - Return a MachinePointerInfo record that refers to the +/// the specified FrameIndex. +MachinePointerInfo MachinePointerInfo::getFixedStack(MachineFunction &MF, + int FI, int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getFixedStack(FI), Offset); +} + +MachinePointerInfo MachinePointerInfo::getJumpTable(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getJumpTable()); +} + +MachinePointerInfo MachinePointerInfo::getGOT(MachineFunction &MF) { + return MachinePointerInfo(MF.getPSVManager().getGOT()); +} + +MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF, + int64_t Offset) { + return MachinePointerInfo(MF.getPSVManager().getStack(), Offset); +} + +MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f, + uint64_t s, unsigned int a, + const AAMDNodes &AAInfo, + const MDNode *Ranges) + : PtrInfo(ptrinfo), Size(s), + Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)), + AAInfo(AAInfo), Ranges(Ranges) { + assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() || + isa<PointerType>(PtrInfo.V.get<const Value*>()->getType())) && + "invalid pointer value"); + assert(getBaseAlignment() == a && "Alignment is not a power of 2!"); + assert((isLoad() || isStore()) && "Not a load/store!"); +} + +/// Profile - Gather unique data for the object. +/// +void MachineMemOperand::Profile(FoldingSetNodeID &ID) const { + ID.AddInteger(getOffset()); + ID.AddInteger(Size); + ID.AddPointer(getOpaqueValue()); + ID.AddInteger(Flags); +} + +void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) { + // The Value and Offset may differ due to CSE. But the flags and size + // should be the same. + assert(MMO->getFlags() == getFlags() && "Flags mismatch!"); + assert(MMO->getSize() == getSize() && "Size mismatch!"); + + if (MMO->getBaseAlignment() >= getBaseAlignment()) { + // Update the alignment value. + Flags = (Flags & ((1 << MOMaxBits) - 1)) | + ((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits); + // Also update the base and offset, because the new alignment may + // not be applicable with the old ones. + PtrInfo = MMO->PtrInfo; + } +} + +/// getAlignment - Return the minimum known alignment in bytes of the +/// actual memory reference. +uint64_t MachineMemOperand::getAlignment() const { + return MinAlign(getBaseAlignment(), getOffset()); +} + +void MachineMemOperand::print(raw_ostream &OS) const { + ModuleSlotTracker DummyMST(nullptr); + print(OS, DummyMST); +} +void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { + assert((isLoad() || isStore()) && + "SV has to be a load, store or both."); + + if (isVolatile()) + OS << "Volatile "; + + if (isLoad()) + OS << "LD"; + if (isStore()) + OS << "ST"; + OS << getSize(); + + // Print the address information. + OS << "["; + if (const Value *V = getValue()) + V->printAsOperand(OS, /*PrintType=*/false, MST); + else if (const PseudoSourceValue *PSV = getPseudoValue()) + PSV->printCustom(OS); + else + OS << "<unknown>"; + + unsigned AS = getAddrSpace(); + if (AS != 0) + OS << "(addrspace=" << AS << ')'; + + // If the alignment of the memory reference itself differs from the alignment + // of the base pointer, print the base alignment explicitly, next to the base + // pointer. + if (getBaseAlignment() != getAlignment()) + OS << "(align=" << getBaseAlignment() << ")"; + + if (getOffset() != 0) + OS << "+" << getOffset(); + OS << "]"; + + // Print the alignment of the reference. + if (getBaseAlignment() != getAlignment() || getBaseAlignment() != getSize()) + OS << "(align=" << getAlignment() << ")"; + + // Print TBAA info. + if (const MDNode *TBAAInfo = getAAInfo().TBAA) { + OS << "(tbaa="; + if (TBAAInfo->getNumOperands() > 0) + TBAAInfo->getOperand(0)->printAsOperand(OS, MST); + else + OS << "<unknown>"; + OS << ")"; + } + + // Print AA scope info. + if (const MDNode *ScopeInfo = getAAInfo().Scope) { + OS << "(alias.scope="; + if (ScopeInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = ScopeInfo->getNumOperands(); i != ie; ++i) { + ScopeInfo->getOperand(i)->printAsOperand(OS, MST); + if (i != ie-1) + OS << ","; + } + else + OS << "<unknown>"; + OS << ")"; + } + + // Print AA noalias scope info. + if (const MDNode *NoAliasInfo = getAAInfo().NoAlias) { + OS << "(noalias="; + if (NoAliasInfo->getNumOperands() > 0) + for (unsigned i = 0, ie = NoAliasInfo->getNumOperands(); i != ie; ++i) { + NoAliasInfo->getOperand(i)->printAsOperand(OS, MST); + if (i != ie-1) + OS << ","; + } + else + OS << "<unknown>"; + OS << ")"; + } + + // Print nontemporal info. + if (isNonTemporal()) + OS << "(nontemporal)"; + + if (isInvariant()) + OS << "(invariant)"; +} + +//===----------------------------------------------------------------------===// +// MachineInstr Implementation +//===----------------------------------------------------------------------===// + +void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { + if (MCID->ImplicitDefs) + for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; + ++ImpDefs) + addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); + if (MCID->ImplicitUses) + for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses; + ++ImpUses) + addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); +} + +/// MachineInstr ctor - This constructor creates a MachineInstr and adds the +/// implicit operands. It reserves space for the number of operands specified by +/// the MCInstrDesc. +MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, + DebugLoc dl, bool NoImp) + : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0), + AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr), + debugLoc(std::move(dl)) { + assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + + // Reserve space for the expected number of operands. + if (unsigned NumOps = MCID->getNumOperands() + + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { + CapOperands = OperandCapacity::get(NumOps); + Operands = MF.allocateOperandArray(CapOperands); + } + + if (!NoImp) + addImplicitDefUseOperands(MF); +} + +/// MachineInstr ctor - Copies MachineInstr arg exactly +/// +MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) + : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0), + Flags(0), AsmPrinterFlags(0), + NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), + debugLoc(MI.getDebugLoc()) { + assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor"); + + CapOperands = OperandCapacity::get(MI.getNumOperands()); + Operands = MF.allocateOperandArray(CapOperands); + + // Copy operands. + for (const MachineOperand &MO : MI.operands()) + addOperand(MF, MO); + + // Copy all the sensible flags. + setFlags(MI.Flags); +} + +/// getRegInfo - If this instruction is embedded into a MachineFunction, +/// return the MachineRegisterInfo object for the current function, otherwise +/// return null. +MachineRegisterInfo *MachineInstr::getRegInfo() { + if (MachineBasicBlock *MBB = getParent()) + return &MBB->getParent()->getRegInfo(); + return nullptr; +} + +/// RemoveRegOperandsFromUseLists - Unlink all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands already be on their use lists. +void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { + for (MachineOperand &MO : operands()) + if (MO.isReg()) + MRI.removeRegOperandFromUseList(&MO); +} + +/// AddRegOperandsToUseLists - Add all of the register operands in +/// this instruction from their respective use lists. This requires that the +/// operands not be on their use lists yet. +void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { + for (MachineOperand &MO : operands()) + if (MO.isReg()) + MRI.addRegOperandToUseList(&MO); +} + +void MachineInstr::addOperand(const MachineOperand &Op) { + MachineBasicBlock *MBB = getParent(); + assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs"); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs"); + addOperand(*MF, Op); +} + +/// Move NumOps MachineOperands from Src to Dst, with support for overlapping +/// ranges. If MRI is non-null also update use-def chains. +static void moveOperands(MachineOperand *Dst, MachineOperand *Src, + unsigned NumOps, MachineRegisterInfo *MRI) { + if (MRI) + return MRI->moveOperands(Dst, Src, NumOps); + + // MachineOperand is a trivially copyable type so we can just use memmove. + std::memmove(Dst, Src, NumOps * sizeof(MachineOperand)); +} + +/// addOperand - Add the specified operand to the instruction. If it is an +/// implicit operand, it is added to the end of the operand list. If it is +/// an explicit operand it is added at the end of the explicit operand list +/// (before the first implicit operand). +void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { + assert(MCID && "Cannot add operands before providing an instr descriptor"); + + // Check if we're adding one of our existing operands. + if (&Op >= Operands && &Op < Operands + NumOperands) { + // This is unusual: MI->addOperand(MI->getOperand(i)). + // If adding Op requires reallocating or moving existing operands around, + // the Op reference could go stale. Support it by copying Op. + MachineOperand CopyOp(Op); + return addOperand(MF, CopyOp); + } + + // Find the insert location for the new operand. Implicit registers go at + // the end, everything else goes before the implicit regs. + // + // FIXME: Allow mixed explicit and implicit operands on inline asm. + // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as + // implicit-defs, but they must not be moved around. See the FIXME in + // InstrEmitter.cpp. + unsigned OpNo = getNumOperands(); + bool isImpReg = Op.isReg() && Op.isImplicit(); + if (!isImpReg && !isInlineAsm()) { + while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { + --OpNo; + assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); + } + } + +#ifndef NDEBUG + bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; + // OpNo now points as the desired insertion point. Unless this is a variadic + // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). + // RegMask operands go between the explicit and implicit operands. + assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || + OpNo < MCID->getNumOperands() || isMetaDataOp) && + "Trying to add an operand to a machine instr that is already done!"); +#endif + + MachineRegisterInfo *MRI = getRegInfo(); + + // Determine if the Operands array needs to be reallocated. + // Save the old capacity and operand array. + OperandCapacity OldCap = CapOperands; + MachineOperand *OldOperands = Operands; + if (!OldOperands || OldCap.getSize() == getNumOperands()) { + CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1); + Operands = MF.allocateOperandArray(CapOperands); + // Move the operands before the insertion point. + if (OpNo) + moveOperands(Operands, OldOperands, OpNo, MRI); + } + + // Move the operands following the insertion point. + if (OpNo != NumOperands) + moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo, + MRI); + ++NumOperands; + + // Deallocate the old operand array. + if (OldOperands != Operands && OldOperands) + MF.deallocateOperandArray(OldCap, OldOperands); + + // Copy Op into place. It still needs to be inserted into the MRI use lists. + MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op); + NewMO->ParentMI = this; + + // When adding a register operand, tell MRI about it. + if (NewMO->isReg()) { + // Ensure isOnRegUseList() returns false, regardless of Op's status. + NewMO->Contents.Reg.Prev = nullptr; + // Ignore existing ties. This is not a property that can be copied. + NewMO->TiedTo = 0; + // Add the new operand to MRI, but only for instructions in an MBB. + if (MRI) + MRI->addRegOperandToUseList(NewMO); + // The MCID operand information isn't accurate until we start adding + // explicit operands. The implicit operands are added first, then the + // explicits are inserted before them. + if (!isImpReg) { + // Tie uses to defs as indicated in MCInstrDesc. + if (NewMO->isUse()) { + int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); + if (DefIdx != -1) + tieOperands(DefIdx, OpNo); + } + // If the register operand is flagged as early, mark the operand as such. + if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) + NewMO->setIsEarlyClobber(true); + } + } +} + +/// RemoveOperand - Erase an operand from an instruction, leaving it with one +/// fewer operand than it started with. +/// +void MachineInstr::RemoveOperand(unsigned OpNo) { + assert(OpNo < getNumOperands() && "Invalid operand number"); + untieRegOperand(OpNo); + +#ifndef NDEBUG + // Moving tied operands would break the ties. + for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i) + if (Operands[i].isReg()) + assert(!Operands[i].isTied() && "Cannot move tied operands"); +#endif + + MachineRegisterInfo *MRI = getRegInfo(); + if (MRI && Operands[OpNo].isReg()) + MRI->removeRegOperandFromUseList(Operands + OpNo); + + // Don't call the MachineOperand destructor. A lot of this code depends on + // MachineOperand having a trivial destructor anyway, and adding a call here + // wouldn't make it 'destructor-correct'. + + if (unsigned N = NumOperands - 1 - OpNo) + moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI); + --NumOperands; +} + +/// addMemOperand - Add a MachineMemOperand to the machine instruction. +/// This function should be used only occasionally. The setMemRefs function +/// is the primary method for setting up a MachineInstr's MemRefs list. +void MachineInstr::addMemOperand(MachineFunction &MF, + MachineMemOperand *MO) { + mmo_iterator OldMemRefs = MemRefs; + unsigned OldNumMemRefs = NumMemRefs; + + unsigned NewNum = NumMemRefs + 1; + mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); + + std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); + NewMemRefs[NewNum - 1] = MO; + setMemRefs(NewMemRefs, NewMemRefs + NewNum); +} + +std::pair<MachineInstr::mmo_iterator, unsigned> +MachineInstr::mergeMemRefsWith(const MachineInstr& Other) { + // TODO: If we end up with too many memory operands, return the empty + // conservative set rather than failing asserts. + // TODO: consider uniquing elements within the operand lists to reduce + // space usage and fall back to conservative information less often. + size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin()) + + (Other.memoperands_end() - Other.memoperands_begin()); + + MachineFunction *MF = getParent()->getParent(); + mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs); + mmo_iterator MemEnd = std::copy(memoperands_begin(), memoperands_end(), + MemBegin); + MemEnd = std::copy(Other.memoperands_begin(), Other.memoperands_end(), + MemEnd); + assert(MemEnd - MemBegin == (ptrdiff_t)CombinedNumMemRefs && + "missing memrefs"); + + return std::make_pair(MemBegin, CombinedNumMemRefs); +} + +bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { + assert(!isBundledWithPred() && "Must be called on bundle header"); + for (MachineBasicBlock::const_instr_iterator MII = getIterator();; ++MII) { + if (MII->getDesc().getFlags() & Mask) { + if (Type == AnyInBundle) + return true; + } else { + if (Type == AllInBundle && !MII->isBundle()) + return false; + } + // This was the last instruction in the bundle. + if (!MII->isBundledWithSucc()) + return Type == AllInBundle; + } +} + +bool MachineInstr::isIdenticalTo(const MachineInstr *Other, + MICheckType Check) const { + // If opcodes or number of operands are not the same then the two + // instructions are obviously not identical. + if (Other->getOpcode() != getOpcode() || + Other->getNumOperands() != getNumOperands()) + return false; + + if (isBundle()) { + // Both instructions are bundles, compare MIs inside the bundle. + MachineBasicBlock::const_instr_iterator I1 = getIterator(); + MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end(); + MachineBasicBlock::const_instr_iterator I2 = Other->getIterator(); + MachineBasicBlock::const_instr_iterator E2= Other->getParent()->instr_end(); + while (++I1 != E1 && I1->isInsideBundle()) { + ++I2; + if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(&*I2, Check)) + return false; + } + } + + // Check operands to make sure they match. + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + const MachineOperand &OMO = Other->getOperand(i); + if (!MO.isReg()) { + if (!MO.isIdenticalTo(OMO)) + return false; + continue; + } + + // Clients may or may not want to ignore defs when testing for equality. + // For example, machine CSE pass only cares about finding common + // subexpressions, so it's safe to ignore virtual register defs. + if (MO.isDef()) { + if (Check == IgnoreDefs) + continue; + else if (Check == IgnoreVRegDefs) { + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + TargetRegisterInfo::isPhysicalRegister(OMO.getReg())) + if (MO.getReg() != OMO.getReg()) + return false; + } else { + if (!MO.isIdenticalTo(OMO)) + return false; + if (Check == CheckKillDead && MO.isDead() != OMO.isDead()) + return false; + } + } else { + if (!MO.isIdenticalTo(OMO)) + return false; + if (Check == CheckKillDead && MO.isKill() != OMO.isKill()) + return false; + } + } + // If DebugLoc does not match then two dbg.values are not identical. + if (isDebugValue()) + if (getDebugLoc() && Other->getDebugLoc() && + getDebugLoc() != Other->getDebugLoc()) + return false; + return true; +} + +MachineInstr *MachineInstr::removeFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + return getParent()->remove(this); +} + +MachineInstr *MachineInstr::removeFromBundle() { + assert(getParent() && "Not embedded in a basic block!"); + return getParent()->remove_instr(this); +} + +void MachineInstr::eraseFromParent() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase(this); +} + +void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() { + assert(getParent() && "Not embedded in a basic block!"); + MachineBasicBlock *MBB = getParent(); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Not embedded in a function!"); + + MachineInstr *MI = (MachineInstr *)this; + MachineRegisterInfo &MRI = MF->getRegInfo(); + + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MRI.markUsesInDebugValueAsUndef(Reg); + } + MI->eraseFromParent(); +} + +void MachineInstr::eraseFromBundle() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase_instr(this); +} + +/// getNumExplicitOperands - Returns the number of non-implicit operands. +/// +unsigned MachineInstr::getNumExplicitOperands() const { + unsigned NumOperands = MCID->getNumOperands(); + if (!MCID->isVariadic()) + return NumOperands; + + for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isImplicit()) + NumOperands++; + } + return NumOperands; +} + +void MachineInstr::bundleWithPred() { + assert(!isBundledWithPred() && "MI is already bundled with its predecessor"); + setFlag(BundledPred); + MachineBasicBlock::instr_iterator Pred = getIterator(); + --Pred; + assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags"); + Pred->setFlag(BundledSucc); +} + +void MachineInstr::bundleWithSucc() { + assert(!isBundledWithSucc() && "MI is already bundled with its successor"); + setFlag(BundledSucc); + MachineBasicBlock::instr_iterator Succ = getIterator(); + ++Succ; + assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags"); + Succ->setFlag(BundledPred); +} + +void MachineInstr::unbundleFromPred() { + assert(isBundledWithPred() && "MI isn't bundled with its predecessor"); + clearFlag(BundledPred); + MachineBasicBlock::instr_iterator Pred = getIterator(); + --Pred; + assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags"); + Pred->clearFlag(BundledSucc); +} + +void MachineInstr::unbundleFromSucc() { + assert(isBundledWithSucc() && "MI isn't bundled with its successor"); + clearFlag(BundledSucc); + MachineBasicBlock::instr_iterator Succ = getIterator(); + ++Succ; + assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); + Succ->clearFlag(BundledPred); +} + +bool MachineInstr::isStackAligningInlineAsm() const { + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + return true; + } + return false; +} + +InlineAsm::AsmDialect MachineInstr::getInlineAsmDialect() const { + assert(isInlineAsm() && "getInlineAsmDialect() only works for inline asms!"); + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + return InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect) != 0); +} + +int MachineInstr::findInlineAsmFlagIdx(unsigned OpIdx, + unsigned *GroupNo) const { + assert(isInlineAsm() && "Expected an inline asm instruction"); + assert(OpIdx < getNumOperands() && "OpIdx out of range"); + + // Ignore queries about the initial operands. + if (OpIdx < InlineAsm::MIOp_FirstOperand) + return -1; + + unsigned Group = 0; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + // If we reach the implicit register operands, stop looking. + if (!FlagMO.isImm()) + return -1; + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + if (i + NumOps > OpIdx) { + if (GroupNo) + *GroupNo = Group; + return i; + } + ++Group; + } + return -1; +} + +const TargetRegisterClass* +MachineInstr::getRegClassConstraint(unsigned OpIdx, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) const { + assert(getParent() && "Can't have an MBB reference here!"); + assert(getParent()->getParent() && "Can't have an MF reference here!"); + const MachineFunction &MF = *getParent()->getParent(); + + // Most opcodes have fixed constraints in their MCInstrDesc. + if (!isInlineAsm()) + return TII->getRegClass(getDesc(), OpIdx, TRI, MF); + + if (!getOperand(OpIdx).isReg()) + return nullptr; + + // For tied uses on inline asm, get the constraint from the def. + unsigned DefIdx; + if (getOperand(OpIdx).isUse() && isRegTiedToDefOperand(OpIdx, &DefIdx)) + OpIdx = DefIdx; + + // Inline asm stores register class constraints in the flag word. + int FlagIdx = findInlineAsmFlagIdx(OpIdx); + if (FlagIdx < 0) + return nullptr; + + unsigned Flag = getOperand(FlagIdx).getImm(); + unsigned RCID; + if (InlineAsm::hasRegClassConstraint(Flag, RCID)) + return TRI->getRegClass(RCID); + + // Assume that all registers in a memory operand are pointers. + if (InlineAsm::getKind(Flag) == InlineAsm::Kind_Mem) + return TRI->getPointerRegClass(MF); + + return nullptr; +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg( + unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, bool ExploreBundle) const { + // Check every operands inside the bundle if we have + // been asked to. + if (ExploreBundle) + for (ConstMIBundleOperands OpndIt(this); OpndIt.isValid() && CurRC; + ++OpndIt) + CurRC = OpndIt->getParent()->getRegClassConstraintEffectForVRegImpl( + OpndIt.getOperandNo(), Reg, CurRC, TII, TRI); + else + // Otherwise, just check the current operands. + for (unsigned i = 0, e = NumOperands; i < e && CurRC; ++i) + CurRC = getRegClassConstraintEffectForVRegImpl(i, Reg, CurRC, TII, TRI); + return CurRC; +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl( + unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + assert(CurRC && "Invalid initial register class"); + // Check if Reg is constrained by some of its use/def from MI. + const MachineOperand &MO = getOperand(OpIdx); + if (!MO.isReg() || MO.getReg() != Reg) + return CurRC; + // If yes, accumulate the constraints through the operand. + return getRegClassConstraintEffect(OpIdx, CurRC, TII, TRI); +} + +const TargetRegisterClass *MachineInstr::getRegClassConstraintEffect( + unsigned OpIdx, const TargetRegisterClass *CurRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const { + const TargetRegisterClass *OpRC = getRegClassConstraint(OpIdx, TII, TRI); + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isReg() && + "Cannot get register constraints for non-register operand"); + assert(CurRC && "Invalid initial register class"); + if (unsigned SubIdx = MO.getSubReg()) { + if (OpRC) + CurRC = TRI->getMatchingSuperRegClass(CurRC, OpRC, SubIdx); + else + CurRC = TRI->getSubClassWithSubReg(CurRC, SubIdx); + } else if (OpRC) + CurRC = TRI->getCommonSubClass(CurRC, OpRC); + return CurRC; +} + +/// Return the number of instructions inside the MI bundle, not counting the +/// header instruction. +unsigned MachineInstr::getBundleSize() const { + MachineBasicBlock::const_instr_iterator I = getIterator(); + unsigned Size = 0; + while (I->isBundledWithSucc()) + ++Size, ++I; + return Size; +} + +/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of +/// the specific register or -1 if it is not found. It further tightens +/// the search criteria to a use that kills the register if isKill is true. +int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill, + const TargetRegisterInfo *TRI) const { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MOReg == Reg || + (TRI && + TargetRegisterInfo::isPhysicalRegister(MOReg) && + TargetRegisterInfo::isPhysicalRegister(Reg) && + TRI->isSubRegister(MOReg, Reg))) + if (!isKill || MO.isKill()) + return i; + } + return -1; +} + +/// readsWritesVirtualRegister - Return a pair of bools (reads, writes) +/// indicating if this instruction reads or writes Reg. This also considers +/// partial defines. +std::pair<bool,bool> +MachineInstr::readsWritesVirtualRegister(unsigned Reg, + SmallVectorImpl<unsigned> *Ops) const { + bool PartDef = false; // Partial redefine. + bool FullDef = false; // Full define. + bool Use = false; + + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + if (Ops) + Ops->push_back(i); + if (MO.isUse()) + Use |= !MO.isUndef(); + else if (MO.getSubReg() && !MO.isUndef()) + // A partial <def,undef> doesn't count as reading the register. + PartDef = true; + else + FullDef = true; + } + // A partial redefine uses Reg unless there is also a full define. + return std::make_pair(Use || (PartDef && !FullDef), PartDef || FullDef); +} + +/// findRegisterDefOperandIdx() - Returns the operand index that is a def of +/// the specified register or -1 if it is not found. If isDead is true, defs +/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it +/// also checks if there is a def of a super-register. +int +MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap, + const TargetRegisterInfo *TRI) const { + bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + // Accept regmask operands when Overlap is set. + // Ignore them when looking for a specific def operand (Overlap == false). + if (isPhys && Overlap && MO.isRegMask() && MO.clobbersPhysReg(Reg)) + return i; + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + bool Found = (MOReg == Reg); + if (!Found && TRI && isPhys && + TargetRegisterInfo::isPhysicalRegister(MOReg)) { + if (Overlap) + Found = TRI->regsOverlap(MOReg, Reg); + else + Found = TRI->isSubRegister(MOReg, Reg); + } + if (Found && (!isDead || MO.isDead())) + return i; + } + return -1; +} + +/// findFirstPredOperandIdx() - Find the index of the first operand in the +/// operand list that is used to represent the predicate. It returns -1 if +/// none is found. +int MachineInstr::findFirstPredOperandIdx() const { + // Don't call MCID.findFirstPredOperandIdx() because this variant + // is sometimes called on an instruction that's not yet complete, and + // so the number of operands is less than the MCID indicates. In + // particular, the PTX target does this. + const MCInstrDesc &MCID = getDesc(); + if (MCID.isPredicable()) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (MCID.OpInfo[i].isPredicate()) + return i; + } + + return -1; +} + +// MachineOperand::TiedTo is 4 bits wide. +const unsigned TiedMax = 15; + +/// tieOperands - Mark operands at DefIdx and UseIdx as tied to each other. +/// +/// Use and def operands can be tied together, indicated by a non-zero TiedTo +/// field. TiedTo can have these values: +/// +/// 0: Operand is not tied to anything. +/// 1 to TiedMax-1: Tied to getOperand(TiedTo-1). +/// TiedMax: Tied to an operand >= TiedMax-1. +/// +/// The tied def must be one of the first TiedMax operands on a normal +/// instruction. INLINEASM instructions allow more tied defs. +/// +void MachineInstr::tieOperands(unsigned DefIdx, unsigned UseIdx) { + MachineOperand &DefMO = getOperand(DefIdx); + MachineOperand &UseMO = getOperand(UseIdx); + assert(DefMO.isDef() && "DefIdx must be a def operand"); + assert(UseMO.isUse() && "UseIdx must be a use operand"); + assert(!DefMO.isTied() && "Def is already tied to another use"); + assert(!UseMO.isTied() && "Use is already tied to another def"); + + if (DefIdx < TiedMax) + UseMO.TiedTo = DefIdx + 1; + else { + // Inline asm can use the group descriptors to find tied operands, but on + // normal instruction, the tied def must be within the first TiedMax + // operands. + assert(isInlineAsm() && "DefIdx out of range"); + UseMO.TiedTo = TiedMax; + } + + // UseIdx can be out of range, we'll search for it in findTiedOperandIdx(). + DefMO.TiedTo = std::min(UseIdx + 1, TiedMax); +} + +/// Given the index of a tied register operand, find the operand it is tied to. +/// Defs are tied to uses and vice versa. Returns the index of the tied operand +/// which must exist. +unsigned MachineInstr::findTiedOperandIdx(unsigned OpIdx) const { + const MachineOperand &MO = getOperand(OpIdx); + assert(MO.isTied() && "Operand isn't tied"); + + // Normally TiedTo is in range. + if (MO.TiedTo < TiedMax) + return MO.TiedTo - 1; + + // Uses on normal instructions can be out of range. + if (!isInlineAsm()) { + // Normal tied defs must be in the 0..TiedMax-1 range. + if (MO.isUse()) + return TiedMax - 1; + // MO is a def. Search for the tied use. + for (unsigned i = TiedMax - 1, e = getNumOperands(); i != e; ++i) { + const MachineOperand &UseMO = getOperand(i); + if (UseMO.isReg() && UseMO.isUse() && UseMO.TiedTo == OpIdx + 1) + return i; + } + llvm_unreachable("Can't find tied use"); + } + + // Now deal with inline asm by parsing the operand group descriptor flags. + // Find the beginning of each operand group. + SmallVector<unsigned, 8> GroupIdx; + unsigned OpIdxGroup = ~0u; + unsigned NumOps; + for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands(); i < e; + i += NumOps) { + const MachineOperand &FlagMO = getOperand(i); + assert(FlagMO.isImm() && "Invalid tied operand on inline asm"); + unsigned CurGroup = GroupIdx.size(); + GroupIdx.push_back(i); + NumOps = 1 + InlineAsm::getNumOperandRegisters(FlagMO.getImm()); + // OpIdx belongs to this operand group. + if (OpIdx > i && OpIdx < i + NumOps) + OpIdxGroup = CurGroup; + unsigned TiedGroup; + if (!InlineAsm::isUseOperandTiedToDef(FlagMO.getImm(), TiedGroup)) + continue; + // Operands in this group are tied to operands in TiedGroup which must be + // earlier. Find the number of operands between the two groups. + unsigned Delta = i - GroupIdx[TiedGroup]; + + // OpIdx is a use tied to TiedGroup. + if (OpIdxGroup == CurGroup) + return OpIdx - Delta; + + // OpIdx is a def tied to this use group. + if (OpIdxGroup == TiedGroup) + return OpIdx + Delta; + } + llvm_unreachable("Invalid tied operand on inline asm"); +} + +/// clearKillInfo - Clears kill flags on all operands. +/// +void MachineInstr::clearKillInfo() { + for (MachineOperand &MO : operands()) { + if (MO.isReg() && MO.isUse()) + MO.setIsKill(false); + } +} + +void MachineInstr::substituteRegister(unsigned FromReg, + unsigned ToReg, + unsigned SubIdx, + const TargetRegisterInfo &RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (SubIdx) + ToReg = RegInfo.getSubReg(ToReg, SubIdx); + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substPhysReg(ToReg, RegInfo); + } + } else { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substVirtReg(ToReg, SubIdx, RegInfo); + } + } +} + +/// isSafeToMove - Return true if it is safe to move this instruction. If +/// SawStore is set to true, it means that there is a store (or call) between +/// the instruction's location and its intended destination. +bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const { + // Ignore stuff that we obviously can't move. + // + // Treat volatile loads as stores. This is not strictly necessary for + // volatiles, but it is required for atomic loads. It is not allowed to move + // a load across an atomic load with Ordering > Monotonic. + if (mayStore() || isCall() || + (mayLoad() && hasOrderedMemoryRef())) { + SawStore = true; + return false; + } + + if (isPosition() || isDebugValue() || isTerminator() || + hasUnmodeledSideEffects()) + return false; + + // See if this instruction does a load. If so, we have to guarantee that the + // loaded value doesn't change between the load and the its intended + // destination. The check for isInvariantLoad gives the targe the chance to + // classify the load as always returning a constant, e.g. a constant pool + // load. + if (mayLoad() && !isInvariantLoad(AA)) + // Otherwise, this is a real load. If there is a store between the load and + // end of block, we can't move it. + return !SawStore; + + return true; +} + +/// hasOrderedMemoryRef - Return true if this instruction may have an ordered +/// or volatile memory reference, or if the information describing the memory +/// reference is not available. Return false if it is known to have no ordered +/// memory references. +bool MachineInstr::hasOrderedMemoryRef() const { + // An instruction known never to access memory won't have a volatile access. + if (!mayStore() && + !mayLoad() && + !isCall() && + !hasUnmodeledSideEffects()) + return false; + + // Otherwise, if the instruction has no memory reference information, + // conservatively assume it wasn't preserved. + if (memoperands_empty()) + return true; + + // Check the memory reference information for ordered references. + for (mmo_iterator I = memoperands_begin(), E = memoperands_end(); I != E; ++I) + if (!(*I)->isUnordered()) + return true; + + return false; +} + +/// isInvariantLoad - Return true if this instruction is loading from a +/// location whose value is invariant across the function. For example, +/// loading a value from the constant pool or from the argument area +/// of a function if it does not change. This should only return true of +/// *all* loads the instruction does are invariant (if it does multiple loads). +bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { + // If the instruction doesn't load at all, it isn't an invariant load. + if (!mayLoad()) + return false; + + // If the instruction has lost its memoperands, conservatively assume that + // it may not be an invariant load. + if (memoperands_empty()) + return false; + + const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo(); + + for (mmo_iterator I = memoperands_begin(), + E = memoperands_end(); I != E; ++I) { + if ((*I)->isVolatile()) return false; + if ((*I)->isStore()) return false; + if ((*I)->isInvariant()) return true; + + + // A load from a constant PseudoSourceValue is invariant. + if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) + if (PSV->isConstant(MFI)) + continue; + + if (const Value *V = (*I)->getValue()) { + // If we have an AliasAnalysis, ask it whether the memory is constant. + if (AA && + AA->pointsToConstantMemory( + MemoryLocation(V, (*I)->getSize(), (*I)->getAAInfo()))) + continue; + } + + // Otherwise assume conservatively. + return false; + } + + // Everything checks out. + return true; +} + +/// isConstantValuePHI - If the specified instruction is a PHI that always +/// merges together the same virtual register, return the register, otherwise +/// return 0. +unsigned MachineInstr::isConstantValuePHI() const { + if (!isPHI()) + return 0; + assert(getNumOperands() >= 3 && + "It's illegal to have a PHI without source operands"); + + unsigned Reg = getOperand(1).getReg(); + for (unsigned i = 3, e = getNumOperands(); i < e; i += 2) + if (getOperand(i).getReg() != Reg) + return 0; + return Reg; +} + +bool MachineInstr::hasUnmodeledSideEffects() const { + if (hasProperty(MCID::UnmodeledSideEffects)) + return true; + if (isInlineAsm()) { + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_HasSideEffects) + return true; + } + + return false; +} + +bool MachineInstr::isLoadFoldBarrier() const { + return mayStore() || isCall() || hasUnmodeledSideEffects(); +} + +/// allDefsAreDead - Return true if all the defs of this instruction are dead. +/// +bool MachineInstr::allDefsAreDead() const { + for (const MachineOperand &MO : operands()) { + if (!MO.isReg() || MO.isUse()) + continue; + if (!MO.isDead()) + return false; + } + return true; +} + +/// copyImplicitOps - Copy implicit register operands from specified +/// instruction to this instruction. +void MachineInstr::copyImplicitOps(MachineFunction &MF, + const MachineInstr *MI) { + for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); + i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) + addOperand(MF, MO); + } +} + +void MachineInstr::dump() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + dbgs() << " " << *this; +#endif +} + +void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const { + const Module *M = nullptr; + if (const MachineBasicBlock *MBB = getParent()) + if (const MachineFunction *MF = MBB->getParent()) + M = MF->getFunction()->getParent(); + + ModuleSlotTracker MST(M); + print(OS, MST, SkipOpers); +} + +void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST, + bool SkipOpers) const { + // We can be a bit tidier if we know the MachineFunction. + const MachineFunction *MF = nullptr; + const TargetRegisterInfo *TRI = nullptr; + const MachineRegisterInfo *MRI = nullptr; + const TargetInstrInfo *TII = nullptr; + if (const MachineBasicBlock *MBB = getParent()) { + MF = MBB->getParent(); + if (MF) { + MRI = &MF->getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + } + } + + // Save a list of virtual registers. + SmallVector<unsigned, 8> VirtRegs; + + // Print explicitly defined operands on the left of an assignment syntax. + unsigned StartOp = 0, e = getNumOperands(); + for (; StartOp < e && getOperand(StartOp).isReg() && + getOperand(StartOp).isDef() && + !getOperand(StartOp).isImplicit(); + ++StartOp) { + if (StartOp != 0) OS << ", "; + getOperand(StartOp).print(OS, MST, TRI); + unsigned Reg = getOperand(StartOp).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + VirtRegs.push_back(Reg); + } + + if (StartOp != 0) + OS << " = "; + + // Print the opcode name. + if (TII) + OS << TII->getName(getOpcode()); + else + OS << "UNKNOWN"; + + if (SkipOpers) + return; + + // Print the rest of the operands. + bool OmittedAnyCallClobbers = false; + bool FirstOp = true; + unsigned AsmDescOp = ~0u; + unsigned AsmOpCount = 0; + + if (isInlineAsm() && e >= InlineAsm::MIOp_FirstOperand) { + // Print asm string. + OS << " "; + getOperand(InlineAsm::MIOp_AsmString).print(OS, MST, TRI); + + // Print HasSideEffects, MayLoad, MayStore, IsAlignStack + unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_HasSideEffects) + OS << " [sideeffect]"; + if (ExtraInfo & InlineAsm::Extra_MayLoad) + OS << " [mayload]"; + if (ExtraInfo & InlineAsm::Extra_MayStore) + OS << " [maystore]"; + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + OS << " [alignstack]"; + if (getInlineAsmDialect() == InlineAsm::AD_ATT) + OS << " [attdialect]"; + if (getInlineAsmDialect() == InlineAsm::AD_Intel) + OS << " [inteldialect]"; + + StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand; + FirstOp = false; + } + + for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) { + const MachineOperand &MO = getOperand(i); + + if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) + VirtRegs.push_back(MO.getReg()); + + // Omit call-clobbered registers which aren't used anywhere. This makes + // call instructions much less noisy on targets where calls clobber lots + // of registers. Don't rely on MO.isDead() because we may be called before + // LiveVariables is run, or we may be looking at a non-allocatable reg. + if (MRI && isCall() && + MO.isReg() && MO.isImplicit() && MO.isDef()) { + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MRI->use_empty(Reg)) { + bool HasAliasLive = false; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + unsigned AliasReg = *AI; + if (!MRI->use_empty(AliasReg)) { + HasAliasLive = true; + break; + } + } + if (!HasAliasLive) { + OmittedAnyCallClobbers = true; + continue; + } + } + } + } + + if (FirstOp) FirstOp = false; else OS << ","; + OS << " "; + if (i < getDesc().NumOperands) { + const MCOperandInfo &MCOI = getDesc().OpInfo[i]; + if (MCOI.isPredicate()) + OS << "pred:"; + if (MCOI.isOptionalDef()) + OS << "opt:"; + } + if (isDebugValue() && MO.isMetadata()) { + // Pretty print DBG_VALUE instructions. + auto *DIV = dyn_cast<DILocalVariable>(MO.getMetadata()); + if (DIV && !DIV->getName().empty()) + OS << "!\"" << DIV->getName() << '\"'; + else + MO.print(OS, MST, TRI); + } else if (TRI && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TRI->getSubRegIndexName(MO.getImm()); + } else if (i == AsmDescOp && MO.isImm()) { + // Pretty print the inline asm operand descriptor. + OS << '$' << AsmOpCount++; + unsigned Flag = MO.getImm(); + switch (InlineAsm::getKind(Flag)) { + case InlineAsm::Kind_RegUse: OS << ":[reguse"; break; + case InlineAsm::Kind_RegDef: OS << ":[regdef"; break; + case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break; + case InlineAsm::Kind_Clobber: OS << ":[clobber"; break; + case InlineAsm::Kind_Imm: OS << ":[imm"; break; + case InlineAsm::Kind_Mem: OS << ":[mem"; break; + default: OS << ":[??" << InlineAsm::getKind(Flag); break; + } + + unsigned RCID = 0; + if (InlineAsm::hasRegClassConstraint(Flag, RCID)) { + if (TRI) { + OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID)); + } else + OS << ":RC" << RCID; + } + + unsigned TiedTo = 0; + if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo)) + OS << " tiedto:$" << TiedTo; + + OS << ']'; + + // Compute the index of the next operand descriptor. + AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag); + } else + MO.print(OS, MST, TRI); + } + + // Briefly indicate whether any call clobbers were omitted. + if (OmittedAnyCallClobbers) { + if (!FirstOp) OS << ","; + OS << " ..."; + } + + bool HaveSemi = false; + const unsigned PrintableFlags = FrameSetup | FrameDestroy; + if (Flags & PrintableFlags) { + if (!HaveSemi) { + OS << ";"; + HaveSemi = true; + } + OS << " flags: "; + + if (Flags & FrameSetup) + OS << "FrameSetup"; + + if (Flags & FrameDestroy) + OS << "FrameDestroy"; + } + + if (!memoperands_empty()) { + if (!HaveSemi) { + OS << ";"; + HaveSemi = true; + } + + OS << " mem:"; + for (mmo_iterator i = memoperands_begin(), e = memoperands_end(); + i != e; ++i) { + (*i)->print(OS, MST); + if (std::next(i) != e) + OS << " "; + } + } + + // Print the regclass of any virtual registers encountered. + if (MRI && !VirtRegs.empty()) { + if (!HaveSemi) { + OS << ";"; + HaveSemi = true; + } + for (unsigned i = 0; i != VirtRegs.size(); ++i) { + const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]); + OS << " " << TRI->getRegClassName(RC) + << ':' << PrintReg(VirtRegs[i]); + for (unsigned j = i+1; j != VirtRegs.size();) { + if (MRI->getRegClass(VirtRegs[j]) != RC) { + ++j; + continue; + } + if (VirtRegs[i] != VirtRegs[j]) + OS << "," << PrintReg(VirtRegs[j]); + VirtRegs.erase(VirtRegs.begin()+j); + } + } + } + + // Print debug location information. + if (isDebugValue() && getOperand(e - 2).isMetadata()) { + if (!HaveSemi) + OS << ";"; + auto *DV = cast<DILocalVariable>(getOperand(e - 2).getMetadata()); + OS << " line no:" << DV->getLine(); + if (auto *InlinedAt = debugLoc->getInlinedAt()) { + DebugLoc InlinedAtDL(InlinedAt); + if (InlinedAtDL && MF) { + OS << " inlined @[ "; + InlinedAtDL.print(OS); + OS << " ]"; + } + } + if (isIndirectDebugValue()) + OS << " indirect"; + } else if (debugLoc && MF) { + if (!HaveSemi) + OS << ";"; + OS << " dbg:"; + debugLoc.print(OS); + } + + OS << '\n'; +} + +bool MachineInstr::addRegisterKilled(unsigned IncomingReg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (Reg == IncomingReg) { + if (!Found) { + if (MO.isKill()) + // The register is already marked kill. + return true; + if (isPhysReg && isRegTiedToDefOperand(i)) + // Two-address uses of physregs must not be marked kill. + return true; + MO.setIsKill(); + Found = true; + } + } else if (hasAliases && MO.isKill() && + TargetRegisterInfo::isPhysicalRegister(Reg)) { + // A super-register kill already exists. + if (RegInfo->isSuperRegister(IncomingReg, Reg)) + return true; + if (RegInfo->isSubRegister(IncomingReg, Reg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded kill operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsKill(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is killed. Add a + // new implicit operand if required. + if (!Found && AddIfNotFound) { + addOperand(MachineOperand::CreateReg(IncomingReg, + false /*IsDef*/, + true /*IsImp*/, + true /*IsKill*/)); + return true; + } + return Found; +} + +void MachineInstr::clearRegisterKills(unsigned Reg, + const TargetRegisterInfo *RegInfo) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + RegInfo = nullptr; + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isUse() || !MO.isKill()) + continue; + unsigned OpReg = MO.getReg(); + if (OpReg == Reg || (RegInfo && RegInfo->isSuperRegister(Reg, OpReg))) + MO.setIsKill(false); + } +} + +bool MachineInstr::addRegisterDead(unsigned Reg, + const TargetRegisterInfo *RegInfo, + bool AddIfNotFound) { + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); + bool hasAliases = isPhysReg && + MCRegAliasIterator(Reg, RegInfo, false).isValid(); + bool Found = false; + SmallVector<unsigned,4> DeadOps; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + + if (MOReg == Reg) { + MO.setIsDead(); + Found = true; + } else if (hasAliases && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(MOReg)) { + // There exists a super-register that's marked dead. + if (RegInfo->isSuperRegister(Reg, MOReg)) + return true; + if (RegInfo->isSubRegister(Reg, MOReg)) + DeadOps.push_back(i); + } + } + + // Trim unneeded dead operands. + while (!DeadOps.empty()) { + unsigned OpIdx = DeadOps.back(); + if (getOperand(OpIdx).isImplicit()) + RemoveOperand(OpIdx); + else + getOperand(OpIdx).setIsDead(false); + DeadOps.pop_back(); + } + + // If not found, this means an alias of one of the operands is dead. Add a + // new implicit operand if required. + if (Found || !AddIfNotFound) + return Found; + + addOperand(MachineOperand::CreateReg(Reg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/, + true /*IsDead*/)); + return true; +} + +void MachineInstr::clearRegisterDeads(unsigned Reg) { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg) + continue; + MO.setIsDead(false); + } +} + +void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) { + for (MachineOperand &MO : operands()) { + if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0) + continue; + MO.setIsUndef(IsUndef); + } +} + +void MachineInstr::addRegisterDefined(unsigned Reg, + const TargetRegisterInfo *RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo); + if (MO) + return; + } else { + for (const MachineOperand &MO : operands()) { + if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && + MO.getSubReg() == 0) + return; + } + } + addOperand(MachineOperand::CreateReg(Reg, + true /*IsDef*/, + true /*IsImp*/)); +} + +void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs, + const TargetRegisterInfo &TRI) { + bool HasRegMask = false; + for (MachineOperand &MO : operands()) { + if (MO.isRegMask()) { + HasRegMask = true; + continue; + } + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // If there are no uses, including partial uses, the def is dead. + if (std::none_of(UsedRegs.begin(), UsedRegs.end(), + [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); })) + MO.setIsDead(); + } + + // This is a call with a register mask operand. + // Mask clobbers are always dead, so add defs for the non-dead defines. + if (HasRegMask) + for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end(); + I != E; ++I) + addRegisterDefined(*I, &TRI); +} + +unsigned +MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { + // Build up a buffer of hash code components. + SmallVector<size_t, 8> HashComponents; + HashComponents.reserve(MI->getNumOperands() + 1); + HashComponents.push_back(MI->getOpcode()); + for (const MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.isDef() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; // Skip virtual register defs. + + HashComponents.push_back(hash_value(MO)); + } + return hash_combine_range(HashComponents.begin(), HashComponents.end()); +} + +void MachineInstr::emitError(StringRef Msg) const { + // Find the source location cookie. + unsigned LocCookie = 0; + const MDNode *LocMD = nullptr; + for (unsigned i = getNumOperands(); i != 0; --i) { + if (getOperand(i-1).isMetadata() && + (LocMD = getOperand(i-1).getMetadata()) && + LocMD->getNumOperands() != 0) { + if (const ConstantInt *CI = + mdconst::dyn_extract<ConstantInt>(LocMD->getOperand(0))) { + LocCookie = CI->getZExtValue(); + break; + } + } + } + + if (const MachineBasicBlock *MBB = getParent()) + if (const MachineFunction *MF = MBB->getParent()) + return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg); + report_fatal_error(Msg); +} diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp new file mode 100644 index 0000000..4619daf --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp @@ -0,0 +1,339 @@ +//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +namespace { + class UnpackMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr) + : MachineFunctionPass(ID), PredicateFtor(Ftor) { + initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + std::function<bool(const Function &)> PredicateFtor; + }; +} // end anonymous namespace + +char UnpackMachineBundles::ID = 0; +char &llvm::UnpackMachineBundlesID = UnpackMachineBundles::ID; +INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles", + "Unpack machine instruction bundles", false, false) + +bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { + if (PredicateFtor && !PredicateFtor(*MF.getFunction())) + return false; + + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + + for (MachineBasicBlock::instr_iterator MII = MBB->instr_begin(), + MIE = MBB->instr_end(); MII != MIE; ) { + MachineInstr *MI = &*MII; + + // Remove BUNDLE instruction and the InsideBundle flags from bundled + // instructions. + if (MI->isBundle()) { + while (++MII != MIE && MII->isBundledWithPred()) { + MII->unbundleFromPred(); + for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MII->getOperand(i); + if (MO.isReg() && MO.isInternalRead()) + MO.setIsInternalRead(false); + } + } + MI->eraseFromParent(); + + Changed = true; + continue; + } + + ++MII; + } + } + + return Changed; +} + +FunctionPass * +llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) { + return new UnpackMachineBundles(Ftor); +} + +namespace { + class FinalizeMachineBundles : public MachineFunctionPass { + public: + static char ID; // Pass identification + FinalizeMachineBundles() : MachineFunctionPass(ID) { + initializeFinalizeMachineBundlesPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + }; +} // end anonymous namespace + +char FinalizeMachineBundles::ID = 0; +char &llvm::FinalizeMachineBundlesID = FinalizeMachineBundles::ID; +INITIALIZE_PASS(FinalizeMachineBundles, "finalize-mi-bundles", + "Finalize machine instruction bundles", false, false) + +bool FinalizeMachineBundles::runOnMachineFunction(MachineFunction &MF) { + return llvm::finalizeBundles(MF); +} + + +/// finalizeBundle - Finalize a machine instruction bundle which includes +/// a sequence of instructions starting from FirstMI to LastMI (exclusive). +/// This routine adds a BUNDLE instruction to represent the bundle, it adds +/// IsInternalRead markers to MachineOperands which are defined inside the +/// bundle, and it copies externally visible defs and uses to the BUNDLE +/// instruction. +void llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI, + MachineBasicBlock::instr_iterator LastMI) { + assert(FirstMI != LastMI && "Empty bundle?"); + MIBundleBuilder Bundle(MBB, FirstMI, LastMI); + + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + MachineInstrBuilder MIB = + BuildMI(MF, FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); + Bundle.prepend(MIB); + + SmallVector<unsigned, 32> LocalDefs; + SmallSet<unsigned, 32> LocalDefSet; + SmallSet<unsigned, 8> DeadDefSet; + SmallSet<unsigned, 16> KilledDefSet; + SmallVector<unsigned, 8> ExternUses; + SmallSet<unsigned, 8> ExternUseSet; + SmallSet<unsigned, 8> KilledUseSet; + SmallSet<unsigned, 8> UndefUseSet; + SmallVector<MachineOperand*, 4> Defs; + for (; FirstMI != LastMI; ++FirstMI) { + for (unsigned i = 0, e = FirstMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = FirstMI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDef()) { + Defs.push_back(&MO); + continue; + } + + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (LocalDefSet.count(Reg)) { + MO.setIsInternalRead(); + if (MO.isKill()) + // Internal def is now killed. + KilledDefSet.insert(Reg); + } else { + if (ExternUseSet.insert(Reg).second) { + ExternUses.push_back(Reg); + if (MO.isUndef()) + UndefUseSet.insert(Reg); + } + if (MO.isKill()) + // External def is now killed. + KilledUseSet.insert(Reg); + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + MachineOperand &MO = *Defs[i]; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + + if (LocalDefSet.insert(Reg).second) { + LocalDefs.push_back(Reg); + if (MO.isDead()) { + DeadDefSet.insert(Reg); + } + } else { + // Re-defined inside the bundle, it's no longer killed. + KilledDefSet.erase(Reg); + if (!MO.isDead()) + // Previously defined but dead. + DeadDefSet.erase(Reg); + } + + if (!MO.isDead()) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + unsigned SubReg = *SubRegs; + if (LocalDefSet.insert(SubReg).second) + LocalDefs.push_back(SubReg); + } + } + } + + Defs.clear(); + } + + SmallSet<unsigned, 32> Added; + for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) { + unsigned Reg = LocalDefs[i]; + if (Added.insert(Reg).second) { + // If it's not live beyond end of the bundle, mark it dead. + bool isDead = DeadDefSet.count(Reg) || KilledDefSet.count(Reg); + MIB.addReg(Reg, getDefRegState(true) | getDeadRegState(isDead) | + getImplRegState(true)); + } + } + + for (unsigned i = 0, e = ExternUses.size(); i != e; ++i) { + unsigned Reg = ExternUses[i]; + bool isKill = KilledUseSet.count(Reg); + bool isUndef = UndefUseSet.count(Reg); + MIB.addReg(Reg, getKillRegState(isKill) | getUndefRegState(isUndef) | + getImplRegState(true)); + } +} + +/// finalizeBundle - Same functionality as the previous finalizeBundle except +/// the last instruction in the bundle is not provided as an input. This is +/// used in cases where bundles are pre-determined by marking instructions +/// with 'InsideBundle' marker. It returns the MBB instruction iterator that +/// points to the end of the bundle. +MachineBasicBlock::instr_iterator +llvm::finalizeBundle(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator FirstMI) { + MachineBasicBlock::instr_iterator E = MBB.instr_end(); + MachineBasicBlock::instr_iterator LastMI = std::next(FirstMI); + while (LastMI != E && LastMI->isInsideBundle()) + ++LastMI; + finalizeBundle(MBB, FirstMI, LastMI); + return LastMI; +} + +/// finalizeBundles - Finalize instruction bundles in the specified +/// MachineFunction. Return true if any bundles are finalized. +bool llvm::finalizeBundles(MachineFunction &MF) { + bool Changed = false; + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock &MBB = *I; + MachineBasicBlock::instr_iterator MII = MBB.instr_begin(); + MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); + if (MII == MIE) + continue; + assert(!MII->isInsideBundle() && + "First instr cannot be inside bundle before finalization!"); + + for (++MII; MII != MIE; ) { + if (!MII->isInsideBundle()) + ++MII; + else { + MII = finalizeBundle(MBB, std::prev(MII)); + Changed = true; + } + } + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// MachineOperand iterator +//===----------------------------------------------------------------------===// + +MachineOperandIteratorBase::VirtRegInfo +MachineOperandIteratorBase::analyzeVirtReg(unsigned Reg, + SmallVectorImpl<std::pair<MachineInstr*, unsigned> > *Ops) { + VirtRegInfo RI = { false, false, false }; + for(; isValid(); ++*this) { + MachineOperand &MO = deref(); + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + // Remember each (MI, OpNo) that refers to Reg. + if (Ops) + Ops->push_back(std::make_pair(MO.getParent(), getOperandNo())); + + // Both defs and uses can read virtual registers. + if (MO.readsReg()) { + RI.Reads = true; + if (MO.isDef()) + RI.Tied = true; + } + + // Only defs can write. + if (MO.isDef()) + RI.Writes = true; + else if (!RI.Tied && MO.getParent()->isRegTiedToDefOperand(getOperandNo())) + RI.Tied = true; + } + return RI; +} + +MachineOperandIteratorBase::PhysRegInfo +MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, + const TargetRegisterInfo *TRI) { + bool AllDefsDead = true; + PhysRegInfo PRI = {false, false, false, false, false, false, false}; + + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "analyzePhysReg not given a physical register!"); + for (; isValid(); ++*this) { + MachineOperand &MO = deref(); + + if (MO.isRegMask() && MO.clobbersPhysReg(Reg)) { + PRI.Clobbered = true; + continue; + } + + if (!MO.isReg()) + continue; + + unsigned MOReg = MO.getReg(); + if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg)) + continue; + + if (!TRI->regsOverlap(MOReg, Reg)) + continue; + + bool Covered = TRI->isSuperRegisterEq(Reg, MOReg); + if (MO.readsReg()) { + PRI.Read = true; + if (Covered) { + PRI.FullyRead = true; + if (MO.isKill()) + PRI.Killed = true; + } + } else if (MO.isDef()) { + PRI.Defined = true; + if (Covered) + PRI.FullyDefined = true; + if (!MO.isDead()) + AllDefsDead = false; + } + } + + if (AllDefsDead && PRI.FullyDefined) + PRI.DeadDef = true; + + return PRI; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp new file mode 100644 index 0000000..a8368e9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -0,0 +1,1418 @@ +//===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs loop invariant code motion on machine instructions. We +// attempt to remove as much code from the body of a loop as possible. +// +// This pass is not intended to be a replacement or a complete alternative +// for the LLVM-IR-level LICM pass. It is only designed to hoist simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "machine-licm" + +static cl::opt<bool> +AvoidSpeculation("avoid-speculation", + cl::desc("MachineLICM should avoid speculation"), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +HoistCheapInsts("hoist-cheap-insts", + cl::desc("MachineLICM should hoist even cheap instructions"), + cl::init(false), cl::Hidden); + +static cl::opt<bool> +SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", + cl::desc("MachineLICM should sink instructions into " + "loops to avoid register spills"), + cl::init(false), cl::Hidden); + +STATISTIC(NumHoisted, + "Number of machine instructions hoisted out of loops"); +STATISTIC(NumLowRP, + "Number of instructions hoisted in low reg pressure situation"); +STATISTIC(NumHighLatency, + "Number of high latency instructions hoisted"); +STATISTIC(NumCSEed, + "Number of hoisted machine instructions CSEed"); +STATISTIC(NumPostRAHoisted, + "Number of machine instructions hoisted out of loops post regalloc"); + +namespace { + class MachineLICM : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetLoweringBase *TLI; + const TargetRegisterInfo *TRI; + const MachineFrameInfo *MFI; + MachineRegisterInfo *MRI; + TargetSchedModel SchedModel; + bool PreRegAlloc; + + // Various analyses that we use... + AliasAnalysis *AA; // Alias analysis info. + MachineLoopInfo *MLI; // Current MachineLoopInfo + MachineDominatorTree *DT; // Machine dominator tree for the cur loop + + // State that is updated as we process loops + bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. + MachineLoop *CurLoop; // The current loop we are working on. + MachineBasicBlock *CurPreheader; // The preheader for CurLoop. + + // Exit blocks for CurLoop. + SmallVector<MachineBasicBlock*, 8> ExitBlocks; + + bool isExitBlock(const MachineBasicBlock *MBB) const { + return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != + ExitBlocks.end(); + } + + // Track 'estimated' register pressure. + SmallSet<unsigned, 32> RegSeen; + SmallVector<unsigned, 8> RegPressure; + + // Register pressure "limit" per register pressure set. If the pressure + // is higher than the limit, then it's considered high. + SmallVector<unsigned, 8> RegLimit; + + // Register pressure on path leading from loop preheader to current BB. + SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; + + // For each opcode, keep a list of potential CSE instructions. + DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; + + enum { + SpeculateFalse = 0, + SpeculateTrue = 1, + SpeculateUnknown = 2 + }; + + // If a MBB does not dominate loop exiting blocks then it may not safe + // to hoist loads from this block. + // Tri-state: 0 - false, 1 - true, 2 - unknown + unsigned SpeculationState; + + public: + static char ID; // Pass identification, replacement for typeid + MachineLICM() : + MachineFunctionPass(ID), PreRegAlloc(true) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + + explicit MachineLICM(bool PreRA) : + MachineFunctionPass(ID), PreRegAlloc(PreRA) { + initializeMachineLICMPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + void releaseMemory() override { + RegSeen.clear(); + RegPressure.clear(); + RegLimit.clear(); + BackTrace.clear(); + CSEMap.clear(); + } + + private: + /// Keep track of information about hoisting candidates. + struct CandidateInfo { + MachineInstr *MI; + unsigned Def; + int FI; + CandidateInfo(MachineInstr *mi, unsigned def, int fi) + : MI(mi), Def(def), FI(fi) {} + }; + + void HoistRegionPostRA(); + + void HoistPostRA(MachineInstr *MI, unsigned Def); + + void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, + SmallVectorImpl<CandidateInfo> &Candidates); + + void AddToLiveIns(unsigned Reg); + + bool IsLICMCandidate(MachineInstr &I); + + bool IsLoopInvariantInst(MachineInstr &I); + + bool HasLoopPHIUse(const MachineInstr *MI) const; + + bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, + unsigned Reg) const; + + bool IsCheapInstruction(MachineInstr &MI) const; + + bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, + bool Cheap); + + void UpdateBackTraceRegPressure(const MachineInstr *MI); + + bool IsProfitableToHoist(MachineInstr &MI); + + bool IsGuaranteedToExecute(MachineBasicBlock *BB); + + void EnterScope(MachineBasicBlock *MBB); + + void ExitScope(MachineBasicBlock *MBB); + + void ExitScopeIfDone( + MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); + + void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); + + void SinkIntoLoop(); + + void InitRegPressure(MachineBasicBlock *BB); + + DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, + bool ConsiderSeen, + bool ConsiderUnseenAsDef); + + void UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef = false); + + MachineInstr *ExtractHoistableLoad(MachineInstr *MI); + + const MachineInstr * + LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr *> &PrevMIs); + + bool EliminateCSE( + MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI); + + bool MayCSE(MachineInstr *MI); + + bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); + + void InitCSEMap(MachineBasicBlock *BB); + + MachineBasicBlock *getCurPreheader(); + }; +} // end anonymous namespace + +char MachineLICM::ID = 0; +char &llvm::MachineLICMID = MachineLICM::ID; +INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MachineLICM, "machinelicm", + "Machine Loop Invariant Code Motion", false, false) + +/// Test if the given loop is the outer-most loop that has a unique predecessor. +static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. + for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) + if (L->getLoopPredecessor()) + return false; + // None of them did, so this is the outermost with a unique predecessor. + return true; +} + +bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + Changed = FirstInLoop = false; + const TargetSubtargetInfo &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TLI = ST.getTargetLowering(); + TRI = ST.getRegisterInfo(); + MFI = MF.getFrameInfo(); + MRI = &MF.getRegInfo(); + SchedModel.init(ST.getSchedModel(), &ST, TII); + + PreRegAlloc = MRI->isSSA(); + + if (PreRegAlloc) + DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); + else + DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); + DEBUG(dbgs() << MF.getName() << " ********\n"); + + if (PreRegAlloc) { + // Estimate register pressure during pre-regalloc pass. + unsigned NumRPS = TRI->getNumRegPressureSets(); + RegPressure.resize(NumRPS); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + RegLimit.resize(NumRPS); + for (unsigned i = 0, e = NumRPS; i != e; ++i) + RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); + } + + // Get our Loop information... + MLI = &getAnalysis<MachineLoopInfo>(); + DT = &getAnalysis<MachineDominatorTree>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + + SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); + CurPreheader = nullptr; + ExitBlocks.clear(); + + // If this is done before regalloc, only visit outer-most preheader-sporting + // loops. + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); + continue; + } + + CurLoop->getExitBlocks(ExitBlocks); + + if (!PreRegAlloc) + HoistRegionPostRA(); + else { + // CSEMap is initialized for loop header when the first instruction is + // being hoisted. + MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + FirstInLoop = true; + HoistOutOfLoop(N); + CSEMap.clear(); + + if (SinkInstsToAvoidSpills) + SinkIntoLoop(); + } + } + + return Changed; +} + +/// Return true if instruction stores to the specified frame. +static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { + // If we lost memory operands, conservatively assume that the instruction + // writes to all slots. + if (MI->memoperands_empty()) + return true; + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); o != oe; ++o) { + if (!(*o)->isStore() || !(*o)->getPseudoValue()) + continue; + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { + if (Value->getFrameIndex() == FI) + return true; + } + } + return false; +} + +/// Examine the instruction for potentai LICM candidate. Also +/// gather register def and frame object update information. +void MachineLICM::ProcessMI(MachineInstr *MI, + BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, + SmallSet<int, 32> &StoredFIs, + SmallVectorImpl<CandidateInfo> &Candidates) { + bool RuledOut = false; + bool HasNonInvariantUse = false; + unsigned Def = 0; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isFI()) { + // Remember if the instruction stores to the frame index. + int FI = MO.getIndex(); + if (!StoredFIs.count(FI) && + MFI->isSpillSlotObjectIndex(FI) && + InstructionStoresToFI(MI, FI)) + StoredFIs.insert(FI); + HasNonInvariantUse = true; + continue; + } + + // We can't hoist an instruction defining a physreg that is clobbered in + // the loop. + if (MO.isRegMask()) { + PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); + continue; + } + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Not expecting virtual register!"); + + if (!MO.isDef()) { + if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) + // If it's using a non-loop-invariant register, then it's obviously not + // safe to hoist. + HasNonInvariantUse = true; + continue; + } + + if (MO.isImplicit()) { + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + PhysRegClobbers.set(*AI); + if (!MO.isDead()) + // Non-dead implicit def? This cannot be hoisted. + RuledOut = true; + // No need to check if a dead implicit def is also defined by + // another instruction. + continue; + } + + // FIXME: For now, avoid instructions with multiple defs, unless + // it's a dead implicit def. + if (Def) + RuledOut = true; + else + Def = Reg; + + // If we have already seen another instruction that defines the same + // register, then this is not safe. Two defs is indicated by setting a + // PhysRegClobbers bit. + for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { + if (PhysRegDefs.test(*AS)) + PhysRegClobbers.set(*AS); + PhysRegDefs.set(*AS); + } + if (PhysRegClobbers.test(Reg)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; + } + + // Only consider reloads for now and remats which do not have register + // operands. FIXME: Consider unfold load folding instructions. + if (Def && !RuledOut) { + int FI = INT_MIN; + if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || + (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) + Candidates.push_back(CandidateInfo(MI, Def, FI)); + } +} + +/// Walk the specified region of the CFG and hoist loop invariants out to the +/// preheader. +void MachineLICM::HoistRegionPostRA() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + unsigned NumRegs = TRI->getNumRegs(); + BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. + BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. + + SmallVector<CandidateInfo, 32> Candidates; + SmallSet<int, 32> StoredFIs; + + // Walk the entire region, count number of defs for each register, and + // collect potential LICM candidates. + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isEHPad()) continue; + + // Conservatively treat live-in's as an external def. + // FIXME: That means a reload that're reused in successor block(s) will not + // be LICM'ed. + for (const auto &LI : BB->liveins()) { + for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) + PhysRegDefs.set(*AI); + } + + SpeculationState = SpeculateUnknown; + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); + } + } + + // Gather the registers read / clobbered by the terminator. + BitVector TermRegs(NumRegs); + MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); + if (TI != Preheader->end()) { + for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = TI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + TermRegs.set(*AI); + } + } + + // Now evaluate whether the potential candidates qualify. + // 1. Check if the candidate defined register is defined by another + // instruction in the loop. + // 2. If the candidate is a load from stack slot (always true for now), + // check if the slot is stored anywhere in the loop. + // 3. Make sure candidate def should not clobber + // registers read by the terminator. Similarly its def should not be + // clobbered by the terminator. + for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { + if (Candidates[i].FI != INT_MIN && + StoredFIs.count(Candidates[i].FI)) + continue; + + unsigned Def = Candidates[i].Def; + if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { + bool Safe = true; + MachineInstr *MI = Candidates[i].MI; + for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg() || MO.isDef() || !MO.getReg()) + continue; + unsigned Reg = MO.getReg(); + if (PhysRegDefs.test(Reg) || + PhysRegClobbers.test(Reg)) { + // If it's using a non-loop-invariant register, then it's obviously + // not safe to hoist. + Safe = false; + break; + } + } + if (Safe) + HoistPostRA(MI, Candidates[i].Def); + } + } +} + +/// Add register 'Reg' to the livein sets of BBs in the current loop, and make +/// sure it is not killed by any instructions in the loop. +void MachineLICM::AddToLiveIns(unsigned Reg) { + const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (!BB->isLiveIn(Reg)) + BB->addLiveIn(Reg); + for (MachineBasicBlock::iterator + MII = BB->begin(), E = BB->end(); MII != E; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; + if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) + MO.setIsKill(false); + } + } + } +} + +/// When an instruction is found to only use loop invariant operands that is +/// safe to hoist, this instruction is called to do the dirty work. +void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + MachineBasicBlock *Preheader = getCurPreheader(); + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" + << MI->getParent()->getNumber() << ": " << *MI); + + // Splice the instruction to the preheader. + MachineBasicBlock *MBB = MI->getParent(); + Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); + + // Add register to livein list to all the BBs in the current loop since a + // loop invariant must be kept live throughout the whole loop. This is + // important to ensure later passes do not scavenge the def register. + AddToLiveIns(Def); + + ++NumPostRAHoisted; + Changed = true; +} + +/// Check if this mbb is guaranteed to execute. If not then a load from this mbb +/// may not be safe to hoist. +bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { + if (SpeculationState != SpeculateUnknown) + return SpeculationState == SpeculateFalse; + + if (BB != CurLoop->getHeader()) { + // Check loop exiting blocks. + SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; + CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); + for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) + if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { + SpeculationState = SpeculateTrue; + return false; + } + } + + SpeculationState = SpeculateFalse; + return true; +} + +void MachineLICM::EnterScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); + + // Remember livein register pressure. + BackTrace.push_back(RegPressure); +} + +void MachineLICM::ExitScope(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); + BackTrace.pop_back(); +} + +/// Destroy scope for the MBB that corresponds to the given dominator tree node +/// if its a leaf or all of its children are done. Walk up the dominator tree to +/// destroy ancestors which are now done. +void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, + DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { + if (OpenChildren[Node]) + return; + + // Pop scope. + ExitScope(Node->getBlock()); + + // Now traverse upwards to pop ancestors whose offsprings are all done. + while (MachineDomTreeNode *Parent = ParentMap[Node]) { + unsigned Left = --OpenChildren[Parent]; + if (Left != 0) + break; + ExitScope(Parent->getBlock()); + Node = Parent; + } +} + +/// Walk the specified loop in the CFG (defined by all blocks dominated by the +/// specified header block, and that are in the current loop) in depth first +/// order w.r.t the DominatorTree. This allows us to visit definitions before +/// uses, allowing us to hoist a loop body in one pass without iteration. +/// +void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + SmallVector<MachineDomTreeNode*, 32> Scopes; + SmallVector<MachineDomTreeNode*, 8> WorkList; + DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; + DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; + + // Perform a DFS walk to determine the order of visit. + WorkList.push_back(HeaderN); + while (!WorkList.empty()) { + MachineDomTreeNode *Node = WorkList.pop_back_val(); + assert(Node && "Null dominator tree node?"); + MachineBasicBlock *BB = Node->getBlock(); + + // If the header of the loop containing this basic block is a landing pad, + // then don't try to hoist instructions out of this loop. + const MachineLoop *ML = MLI->getLoopFor(BB); + if (ML && ML->getHeader()->isEHPad()) + continue; + + // If this subregion is not in the top level loop at all, exit. + if (!CurLoop->contains(BB)) + continue; + + Scopes.push_back(Node); + const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); + unsigned NumChildren = Children.size(); + + // Don't hoist things out of a large switch statement. This often causes + // code to be hoisted that wasn't going to be executed, and increases + // register pressure in a situation where it's likely to matter. + if (BB->succ_size() >= 25) + NumChildren = 0; + + OpenChildren[Node] = NumChildren; + // Add children in reverse order as then the next popped worklist node is + // the first child of this node. This means we ultimately traverse the + // DOM tree in exactly the same order as if we'd recursed. + for (int i = (int)NumChildren-1; i >= 0; --i) { + MachineDomTreeNode *Child = Children[i]; + ParentMap[Child] = Node; + WorkList.push_back(Child); + } + } + + if (Scopes.size() == 0) + return; + + // Compute registers which are livein into the loop headers. + RegSeen.clear(); + BackTrace.clear(); + InitRegPressure(Preheader); + + // Now perform LICM. + for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { + MachineDomTreeNode *Node = Scopes[i]; + MachineBasicBlock *MBB = Node->getBlock(); + + EnterScope(MBB); + + // Process the block + SpeculationState = SpeculateUnknown; + for (MachineBasicBlock::iterator + MII = MBB->begin(), E = MBB->end(); MII != E; ) { + MachineBasicBlock::iterator NextMII = MII; ++NextMII; + MachineInstr *MI = &*MII; + if (!Hoist(MI, Preheader)) + UpdateRegPressure(MI); + MII = NextMII; + } + + // If it's a leaf node, it's done. Traverse upwards to pop ancestors. + ExitScopeIfDone(Node, OpenChildren, ParentMap); + } +} + +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. +void MachineLICM::SinkIntoLoop() { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) + return; + + SmallVector<MachineInstr *, 8> Candidates; + for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); + I != Preheader->instr_end(); ++I) { + // We need to ensure that we can safely move this instruction into the loop. + // As such, it must not have side-effects, e.g. such as a call has. + if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) + Candidates.push_back(&*I); + } + + for (MachineInstr *I : Candidates) { + const MachineOperand &MO = I->getOperand(0); + if (!MO.isDef() || !MO.isReg() || !MO.getReg()) + continue; + if (!MRI->hasOneDef(MO.getReg())) + continue; + bool CanSink = true; + MachineBasicBlock *B = nullptr; + for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { + // FIXME: Come up with a proper cost model that estimates whether sinking + // the instruction (and thus possibly executing it on every loop + // iteration) is more expensive than a register. + // For now assumes that copies are cheap and thus almost always worth it. + if (!MI.isCopy()) { + CanSink = false; + break; + } + if (!B) { + B = MI.getParent(); + continue; + } + B = DT->findNearestCommonDominator(B, MI.getParent()); + if (!B) { + CanSink = false; + break; + } + } + if (!CanSink || !B || B == Preheader) + continue; + B->splice(B->getFirstNonPHI(), Preheader, I); + } +} + +static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { + return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); +} + +/// Find all virtual register references that are liveout of the preheader to +/// initialize the starting "register pressure". Note this does not count live +/// through (livein but not used) registers. +void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { + std::fill(RegPressure.begin(), RegPressure.end(), 0); + + // If the preheader has only a single predecessor and it ends with a + // fallthrough or an unconditional branch, then scan its predecessor for live + // defs as well. This happens whenever the preheader is created by splitting + // the critical edge from the loop predecessor to the loop header. + if (BB->pred_size() == 1) { + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) + InitRegPressure(*BB->pred_begin()); + } + + for (const MachineInstr &MI : *BB) + UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); +} + +/// Update estimate of register pressure after the specified instruction. +void MachineLICM::UpdateRegPressure(const MachineInstr *MI, + bool ConsiderUnseenAsDef) { + auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); + for (const auto &RPIdAndCost : Cost) { + unsigned Class = RPIdAndCost.first; + if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) + RegPressure[Class] = 0; + else + RegPressure[Class] += RPIdAndCost.second; + } +} + +/// Calculate the additional register pressure that the registers used in MI +/// cause. +/// +/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to +/// figure out which usages are live-ins. +/// FIXME: Figure out a way to consider 'RegSeen' from all code paths. +DenseMap<unsigned, int> +MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, + bool ConsiderUnseenAsDef) { + DenseMap<unsigned, int> Cost; + if (MI->isImplicitDef()) + return Cost; + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // FIXME: It seems bad to use RegSeen only for some of these calculations. + bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + + RegClassWeight W = TRI->getRegClassWeight(RC); + int RCCost = 0; + if (MO.isDef()) + RCCost = W.RegWeight; + else { + bool isKill = isOperandKill(MO, MRI); + if (isNew && !isKill && ConsiderUnseenAsDef) + // Haven't seen this, it must be a livein. + RCCost = W.RegWeight; + else if (!isNew && isKill) + RCCost = -W.RegWeight; + } + if (RCCost == 0) + continue; + const int *PS = TRI->getRegClassPressureSets(RC); + for (; *PS != -1; ++PS) { + if (Cost.find(*PS) == Cost.end()) + Cost[*PS] = RCCost; + else + Cost[*PS] += RCCost; + } + } + return Cost; +} + +/// Return true if this machine instruction loads from global offset table or +/// constant pool. +static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) { + assert (MI.mayLoad() && "Expected MI that loads!"); + + // If we lost memory operands, conservatively assume that the instruction + // reads from everything.. + if (MI.memoperands_empty()) + return true; + + for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), + E = MI.memoperands_end(); I != E; ++I) { + if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { + if (PSV->isGOT() || PSV->isConstantPool()) + return true; + } + } + return false; +} + +/// Returns true if the instruction may be a suitable candidate for LICM. +/// e.g. If the instruction is a call, then it's obviously not safe to hoist it. +bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + // Check if it's safe to move the instruction. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(AA, DontMoveAcrossStore)) + return false; + + // If it is load then check if it is guaranteed to execute by making sure that + // it dominates all exiting blocks. If it doesn't, then there is a path out of + // the loop which does not execute this load, so we can't hoist it. Loads + // from constant memory are not safe to speculate all the time, for example + // indexed load from a jump table. + // Stores and side effects are already checked by isSafeToMove. + if (I.mayLoad() && !mayLoadFromGOTOrConstantPool(I) && + !IsGuaranteedToExecute(I.getParent())) + return false; + + return true; +} + +/// Returns true if the instruction is loop invariant. +/// I.e., all virtual register operands are defined outside of the loop, +/// physical registers aren't accessed explicitly, and there are no side +/// effects that aren't captured by the operands or other flags. +/// +bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { + if (!IsLICMCandidate(I)) + return false; + + // The instruction is loop invariant if all of its operands are. + for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = I.getOperand(i); + + if (!MO.isReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + // Don't hoist an instruction that uses or defines a physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) + return false; + // Otherwise it's safe to move. + continue; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return false; + } else if (CurLoop->getHeader()->isLiveIn(Reg)) { + // If the reg is live into the loop, we can't hoist an instruction + // which would clobber it. + return false; + } + } + + if (!MO.isUse()) + continue; + + assert(MRI->getVRegDef(Reg) && + "Machine instr not mapped for this vreg?!"); + + // If the loop contains the definition of an operand, then the instruction + // isn't loop invariant. + if (CurLoop->contains(MRI->getVRegDef(Reg))) + return false; + } + + // If we got this far, the instruction is loop invariant! + return true; +} + + +/// Return true if the specified instruction is used by a phi node and hoisting +/// it could cause a copy to be inserted. +bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { + SmallVector<const MachineInstr*, 8> Work(1, MI); + do { + MI = Work.pop_back_val(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { + // A PHI may cause a copy to be inserted. + if (UseMI.isPHI()) { + // A PHI inside the loop causes a copy because the live range of Reg is + // extended across the PHI. + if (CurLoop->contains(&UseMI)) + return true; + // A PHI in an exit block can cause a copy to be inserted if the PHI + // has multiple predecessors in the loop with different values. + // For now, approximate by rejecting all exit blocks. + if (isExitBlock(UseMI.getParent())) + return true; + continue; + } + // Look past copies as well. + if (UseMI.isCopy() && CurLoop->contains(&UseMI)) + Work.push_back(&UseMI); + } + } + } while (!Work.empty()); + return false; +} + +/// Compute operand latency between a def of 'Reg' and an use in the current +/// loop, return true if the target considered it high. +bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, + unsigned DefIdx, unsigned Reg) const { + if (MRI->use_nodbg_empty(Reg)) + return false; + + for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { + if (UseMI.isCopyLike()) + continue; + if (!CurLoop->contains(UseMI.getParent())) + continue; + for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { + const MachineOperand &MO = UseMI.getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned MOReg = MO.getReg(); + if (MOReg != Reg) + continue; + + if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i)) + return true; + } + + // Only look at the first in loop use. + break; + } + + return false; +} + +/// Return true if the instruction is marked "cheap" or the operand latency +/// between its def and a use is one or less. +bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { + if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) + return true; + + bool isCheap = false; + unsigned NumDefs = MI.getDesc().getNumDefs(); + for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { + MachineOperand &DefMO = MI.getOperand(i); + if (!DefMO.isReg() || !DefMO.isDef()) + continue; + --NumDefs; + unsigned Reg = DefMO.getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + if (!TII->hasLowDefLatency(SchedModel, &MI, i)) + return false; + isCheap = true; + } + + return isCheap; +} + +/// Visit BBs from header to current BB, check if hoisting an instruction of the +/// given cost matrix can cause high register pressure. +bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, + bool CheapInstr) { + for (const auto &RPIdAndCost : Cost) { + if (RPIdAndCost.second <= 0) + continue; + + unsigned Class = RPIdAndCost.first; + int Limit = RegLimit[Class]; + + // Don't hoist cheap instructions if they would increase register pressure, + // even if we're under the limit. + if (CheapInstr && !HoistCheapInsts) + return true; + + for (const auto &RP : BackTrace) + if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) + return true; + } + + return false; +} + +/// Traverse the back trace from header to the current block and update their +/// register pressures to reflect the effect of hoisting MI from the current +/// block to the preheader. +void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { + // First compute the 'cost' of the instruction, i.e. its contribution + // to register pressure. + auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, + /*ConsiderUnseenAsDef=*/false); + + // Update register pressure of blocks from loop header to current block. + for (auto &RP : BackTrace) + for (const auto &RPIdAndCost : Cost) + RP[RPIdAndCost.first] += RPIdAndCost.second; +} + +/// Return true if it is potentially profitable to hoist the given loop +/// invariant. +bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { + if (MI.isImplicitDef()) + return true; + + // Besides removing computation from the loop, hoisting an instruction has + // these effects: + // + // - The value defined by the instruction becomes live across the entire + // loop. This increases register pressure in the loop. + // + // - If the value is used by a PHI in the loop, a copy will be required for + // lowering the PHI after extending the live range. + // + // - When hoisting the last use of a value in the loop, that value no longer + // needs to be live in the loop. This lowers register pressure in the loop. + + bool CheapInstr = IsCheapInstruction(MI); + bool CreatesCopy = HasLoopPHIUse(&MI); + + // Don't hoist a cheap instruction if it would create a copy in the loop. + if (CheapInstr && CreatesCopy) { + DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); + return false; + } + + // Rematerializable instructions should always be hoisted since the register + // allocator can just pull them down again when needed. + if (TII->isTriviallyReMaterializable(&MI, AA)) + return true; + + // FIXME: If there are long latency loop-invariant instructions inside the + // loop at this point, why didn't the optimizer's LICM hoist them? + for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || MO.isImplicit()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { + DEBUG(dbgs() << "Hoist High Latency: " << MI); + ++NumHighLatency; + return true; + } + } + + // Estimate register pressure to determine whether to LICM the instruction. + // In low register pressure situation, we can be more aggressive about + // hoisting. Also, favors hoisting long latency instructions even in + // moderately high pressure situation. + // Cheap instructions will only be hoisted if they don't increase register + // pressure at all. + auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, + /*ConsiderUnseenAsDef=*/false); + + // Visit BBs from header to current BB, if hoisting this doesn't cause + // high register pressure, then it's safe to proceed. + if (!CanCauseHighRegPressure(Cost, CheapInstr)) { + DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); + ++NumLowRP; + return true; + } + + // Don't risk increasing register pressure if it would create copies. + if (CreatesCopy) { + DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); + return false; + } + + // Do not "speculate" in high register pressure situation. If an + // instruction is not guaranteed to be executed in the loop, it's best to be + // conservative. + if (AvoidSpeculation && + (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { + DEBUG(dbgs() << "Won't speculate: " << MI); + return false; + } + + // High register pressure situation, only hoist if the instruction is going + // to be remat'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA) && + !MI.isInvariantLoad(AA)) { + DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); + return false; + } + + return true; +} + +/// Unfold a load from the given machineinstr if the load itself could be +/// hoisted. Return the unfolded and hoistable load, or null if the load +/// couldn't be unfolded or if it wouldn't be hoistable. +MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { + // Don't unfold simple loads. + if (MI->canFoldAsLoad()) + return nullptr; + + // If not, we may be able to unfold a load and hoist that. + // First test whether the instruction is loading from an amenable + // memory location. + if (!MI->isInvariantLoad(AA)) + return nullptr; + + // Next determine the register class for a temporary register. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc == 0) return nullptr; + const MCInstrDesc &MID = TII->get(NewOpc); + if (MID.getNumDefs() != 1) return nullptr; + MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); + // Ok, we're unfolding. Create a temporary register and do the unfold. + unsigned Reg = MRI->createVirtualRegister(RC); + + SmallVector<MachineInstr *, 2> NewMIs; + bool Success = + TII->unfoldMemoryOperand(MF, MI, Reg, + /*UnfoldLoad=*/true, /*UnfoldStore=*/false, + NewMIs); + (void)Success; + assert(Success && + "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " + "succeeded!"); + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator Pos = MI; + MBB->insert(Pos, NewMIs[0]); + MBB->insert(Pos, NewMIs[1]); + // If unfolding produced a load that wasn't loop-invariant or profitable to + // hoist, discard the new instructions and bail. + if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + return nullptr; + } + + // Update register pressure for the unfolded instruction. + UpdateRegPressure(NewMIs[1]); + + // Otherwise we successfully unfolded a load that we can hoist. + MI->eraseFromParent(); + return NewMIs[0]; +} + +/// Initialize the CSE map with instructions that are in the current loop +/// preheader that may become duplicates of instructions that are hoisted +/// out of the loop. +void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { + for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { + const MachineInstr *MI = &*I; + unsigned Opcode = MI->getOpcode(); + CSEMap[Opcode].push_back(MI); + } +} + +/// Find an instruction amount PrevMIs that is a duplicate of MI. +/// Return this instruction if it's found. +const MachineInstr* +MachineLICM::LookForDuplicate(const MachineInstr *MI, + std::vector<const MachineInstr*> &PrevMIs) { + for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { + const MachineInstr *PrevMI = PrevMIs[i]; + if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) + return PrevMI; + } + return nullptr; +} + +/// Given a LICM'ed instruction, look for an instruction on the preheader that +/// computes the same value. If it's found, do a RAU on with the definition of +/// the existing instruction rather than hoisting the instruction to the +/// preheader. +bool MachineLICM::EliminateCSE(MachineInstr *MI, + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) + return false; + + if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { + DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); + + // Replace virtual registers defined by MI by their counterparts defined + // by Dup. + SmallVector<unsigned, 2> Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + + // Physical registers may not differ here. + assert((!MO.isReg() || MO.getReg() == 0 || + !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || + MO.getReg() == Dup->getOperand(i).getReg()) && + "Instructions with different phys regs are not identical!"); + + if (MO.isReg() && MO.isDef() && + !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + Defs.push_back(i); + } + + SmallVector<const TargetRegisterClass*, 2> OrigRCs; + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + OrigRCs.push_back(MRI->getRegClass(DupReg)); + + if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { + // Restore old RCs if more than one defs. + for (unsigned j = 0; j != i; ++j) + MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); + return false; + } + } + + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Idx = Defs[i]; + unsigned Reg = MI->getOperand(Idx).getReg(); + unsigned DupReg = Dup->getOperand(Idx).getReg(); + MRI->replaceRegWith(Reg, DupReg); + MRI->clearKillFlags(DupReg); + } + + MI->eraseFromParent(); + ++NumCSEed; + return true; + } + return false; +} + +/// Return true if the given instruction will be CSE'd if it's hoisted out of +/// the loop. +bool MachineLICM::MayCSE(MachineInstr *MI) { + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate + // the undef property onto uses. + if (CI == CSEMap.end() || MI->isImplicitDef()) + return false; + + return LookForDuplicate(MI, CI->second) != nullptr; +} + +/// When an instruction is found to use only loop invariant operands +/// that are safe to hoist, this instruction is called to do the dirty work. +/// It returns true if the instruction is hoisted. +bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { + // First check whether we should hoist this instruction. + if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { + // If not, try unfolding a hoistable load. + MI = ExtractHoistableLoad(MI); + if (!MI) return false; + } + + // Now move the instructions to the predecessor, inserting it before any + // terminator instructions. + DEBUG({ + dbgs() << "Hoisting " << *MI; + if (Preheader->getBasicBlock()) + dbgs() << " to MachineBasicBlock " + << Preheader->getName(); + if (MI->getParent()->getBasicBlock()) + dbgs() << " from MachineBasicBlock " + << MI->getParent()->getName(); + dbgs() << "\n"; + }); + + // If this is the first instruction being hoisted to the preheader, + // initialize the CSE map with potential common expressions. + if (FirstInLoop) { + InitCSEMap(Preheader); + FirstInLoop = false; + } + + // Look for opportunity to CSE the hoisted instruction. + unsigned Opcode = MI->getOpcode(); + DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator + CI = CSEMap.find(Opcode); + if (!EliminateCSE(MI, CI)) { + // Otherwise, splice the instruction to the preheader. + Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); + + // Update register pressure for BBs from header to this block. + UpdateBackTraceRegPressure(MI); + + // Clear the kill flags of any register this instruction defines, + // since they may need to be live throughout the entire loop + // rather than just live for part of it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef() && !MO.isDead()) + MRI->clearKillFlags(MO.getReg()); + } + + // Add to the CSE map. + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else + CSEMap[Opcode].push_back(MI); + } + + ++NumHoisted; + Changed = true; + + return true; +} + +/// Get the preheader for the current loop, splitting a critical edge if needed. +MachineBasicBlock *MachineLICM::getCurPreheader() { + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop predecessor, we can't do any hoisting. + + // If we've tried to get a preheader and failed, don't try again. + if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) + return nullptr; + + if (!CurPreheader) { + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (!Pred) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return nullptr; + } + + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + if (!CurPreheader) { + CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); + return nullptr; + } + } + } + return CurPreheader; +} diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp new file mode 100644 index 0000000..2f5c9e0 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp @@ -0,0 +1,83 @@ +//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachineLoopInfo class that is used to identify natural +// loops and determine the loop depth of various nodes of the CFG. Note that +// the loops identified may actually be several natural loops that share the +// same header node... not just a single natural loop. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/Analysis/LoopInfoImpl.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +// Explicitly instantiate methods in LoopInfoImpl.h for MI-level Loops. +template class llvm::LoopBase<MachineBasicBlock, MachineLoop>; +template class llvm::LoopInfoBase<MachineBasicBlock, MachineLoop>; + +char MachineLoopInfo::ID = 0; +INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops", + "Machine Natural Loop Construction", true, true) + +char &llvm::MachineLoopInfoID = MachineLoopInfo::ID; + +bool MachineLoopInfo::runOnMachineFunction(MachineFunction &) { + releaseMemory(); + LI.analyze(getAnalysis<MachineDominatorTree>().getBase()); + return false; +} + +void MachineLoopInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachineBasicBlock *MachineLoop::getTopBlock() { + MachineBasicBlock *TopMBB = getHeader(); + MachineFunction::iterator Begin = TopMBB->getParent()->begin(); + if (TopMBB != Begin) { + MachineBasicBlock *PriorMBB = &*std::prev(TopMBB->getIterator()); + while (contains(PriorMBB)) { + TopMBB = PriorMBB; + if (TopMBB == Begin) break; + PriorMBB = &*std::prev(TopMBB->getIterator()); + } + } + return TopMBB; +} + +MachineBasicBlock *MachineLoop::getBottomBlock() { + MachineBasicBlock *BotMBB = getHeader(); + MachineFunction::iterator End = BotMBB->getParent()->end(); + if (BotMBB != std::prev(End)) { + MachineBasicBlock *NextMBB = &*std::next(BotMBB->getIterator()); + while (contains(NextMBB)) { + BotMBB = NextMBB; + if (BotMBB == &*std::next(BotMBB->getIterator())) + break; + NextMBB = &*std::next(BotMBB->getIterator()); + } + } + return BotMBB; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineLoop::dump() const { + print(dbgs()); +} +#endif diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp new file mode 100644 index 0000000..1956a70 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -0,0 +1,462 @@ +//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/ADT/TinyPtrVector.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +using namespace llvm; +using namespace llvm::dwarf; + +// Handle the Pass registration stuff necessary to use DataLayout's. +INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo", + "Machine Module Information", false, false) +char MachineModuleInfo::ID = 0; + +// Out of line virtual method. +MachineModuleInfoImpl::~MachineModuleInfoImpl() {} + +namespace llvm { +class MMIAddrLabelMapCallbackPtr final : CallbackVH { + MMIAddrLabelMap *Map; +public: + MMIAddrLabelMapCallbackPtr() : Map(nullptr) {} + MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(nullptr) {} + + void setPtr(BasicBlock *BB) { + ValueHandleBase::operator=(BB); + } + + void setMap(MMIAddrLabelMap *map) { Map = map; } + + void deleted() override; + void allUsesReplacedWith(Value *V2) override; +}; + +class MMIAddrLabelMap { + MCContext &Context; + struct AddrLabelSymEntry { + /// Symbols - The symbols for the label. + TinyPtrVector<MCSymbol *> Symbols; + + Function *Fn; // The containing function of the BasicBlock. + unsigned Index; // The index in BBCallbacks for the BasicBlock. + }; + + DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols; + + /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We + /// use this so we get notified if a block is deleted or RAUWd. + std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks; + + /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols + /// whose corresponding BasicBlock got deleted. These symbols need to be + /// emitted at some point in the file, so AsmPrinter emits them after the + /// function body. + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> > + DeletedAddrLabelsNeedingEmission; +public: + + MMIAddrLabelMap(MCContext &context) : Context(context) {} + ~MMIAddrLabelMap() { + assert(DeletedAddrLabelsNeedingEmission.empty() && + "Some labels for deleted blocks never got emitted"); + } + + ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(BasicBlock *BB); + + void takeDeletedSymbolsForFunction(Function *F, + std::vector<MCSymbol*> &Result); + + void UpdateForDeletedBlock(BasicBlock *BB); + void UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New); +}; +} + +ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) { + assert(BB->hasAddressTaken() && + "Shouldn't get label for block without address taken"); + AddrLabelSymEntry &Entry = AddrLabelSymbols[BB]; + + // If we already had an entry for this block, just return it. + if (!Entry.Symbols.empty()) { + assert(BB->getParent() == Entry.Fn && "Parent changed"); + return Entry.Symbols; + } + + // Otherwise, this is a new entry, create a new symbol for it and add an + // entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd. + BBCallbacks.emplace_back(BB); + BBCallbacks.back().setMap(this); + Entry.Index = BBCallbacks.size() - 1; + Entry.Fn = BB->getParent(); + Entry.Symbols.push_back(Context.createTempSymbol()); + return Entry.Symbols; +} + +/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return +/// them. +void MMIAddrLabelMap:: +takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) { + DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I = + DeletedAddrLabelsNeedingEmission.find(F); + + // If there are no entries for the function, just return. + if (I == DeletedAddrLabelsNeedingEmission.end()) return; + + // Otherwise, take the list. + std::swap(Result, I->second); + DeletedAddrLabelsNeedingEmission.erase(I); +} + + +void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) { + // If the block got deleted, there is no need for the symbol. If the symbol + // was already emitted, we can just forget about it, otherwise we need to + // queue it up for later emission when the function is output. + AddrLabelSymEntry Entry = std::move(AddrLabelSymbols[BB]); + AddrLabelSymbols.erase(BB); + assert(!Entry.Symbols.empty() && "Didn't have a symbol, why a callback?"); + BBCallbacks[Entry.Index] = nullptr; // Clear the callback. + + assert((BB->getParent() == nullptr || BB->getParent() == Entry.Fn) && + "Block/parent mismatch"); + + for (MCSymbol *Sym : Entry.Symbols) { + if (Sym->isDefined()) + return; + + // If the block is not yet defined, we need to emit it at the end of the + // function. Add the symbol to the DeletedAddrLabelsNeedingEmission list + // for the containing Function. Since the block is being deleted, its + // parent may already be removed, we have to get the function from 'Entry'. + DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym); + } +} + +void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) { + // Get the entry for the RAUW'd block and remove it from our map. + AddrLabelSymEntry OldEntry = std::move(AddrLabelSymbols[Old]); + AddrLabelSymbols.erase(Old); + assert(!OldEntry.Symbols.empty() && "Didn't have a symbol, why a callback?"); + + AddrLabelSymEntry &NewEntry = AddrLabelSymbols[New]; + + // If New is not address taken, just move our symbol over to it. + if (NewEntry.Symbols.empty()) { + BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback. + NewEntry = std::move(OldEntry); // Set New's entry. + return; + } + + BBCallbacks[OldEntry.Index] = nullptr; // Update the callback. + + // Otherwise, we need to add the old symbols to the new block's set. + NewEntry.Symbols.insert(NewEntry.Symbols.end(), OldEntry.Symbols.begin(), + OldEntry.Symbols.end()); +} + + +void MMIAddrLabelMapCallbackPtr::deleted() { + Map->UpdateForDeletedBlock(cast<BasicBlock>(getValPtr())); +} + +void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { + Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2)); +} + + +//===----------------------------------------------------------------------===// + +MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, + const MCRegisterInfo &MRI, + const MCObjectFileInfo *MOFI) + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) { + initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); +} + +MachineModuleInfo::MachineModuleInfo() + : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) { + llvm_unreachable("This MachineModuleInfo constructor should never be called, " + "MMI should always be explicitly constructed by " + "LLVMTargetMachine"); +} + +MachineModuleInfo::~MachineModuleInfo() { +} + +bool MachineModuleInfo::doInitialization(Module &M) { + + ObjFileMMI = nullptr; + CurCallSite = 0; + CallsEHReturn = false; + CallsUnwindInit = false; + HasEHFunclets = false; + DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false; + PersonalityTypeCache = EHPersonality::Unknown; + AddrLabelSymbols = nullptr; + TheModule = nullptr; + + return false; +} + +bool MachineModuleInfo::doFinalization(Module &M) { + + Personalities.clear(); + + delete AddrLabelSymbols; + AddrLabelSymbols = nullptr; + + Context.reset(); + + delete ObjFileMMI; + ObjFileMMI = nullptr; + + return false; +} + +/// EndFunction - Discard function meta information. +/// +void MachineModuleInfo::EndFunction() { + // Clean up frame info. + FrameInstructions.clear(); + + // Clean up exception info. + LandingPads.clear(); + PersonalityTypeCache = EHPersonality::Unknown; + CallSiteMap.clear(); + TypeInfos.clear(); + FilterIds.clear(); + FilterEnds.clear(); + CallsEHReturn = false; + CallsUnwindInit = false; + HasEHFunclets = false; + VariableDbgInfos.clear(); +} + +//===- Address of Block Management ----------------------------------------===// + +/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified +/// basic block when its address is taken. If other blocks were RAUW'd to +/// this one, we may have to emit them as well, return the whole set. +ArrayRef<MCSymbol *> +MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) { + // Lazily create AddrLabelSymbols. + if (!AddrLabelSymbols) + AddrLabelSymbols = new MMIAddrLabelMap(Context); + return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB)); +} + + +/// takeDeletedSymbolsForFunction - If the specified function has had any +/// references to address-taken blocks generated, but the block got deleted, +/// return the symbol now so we can emit it. This prevents emitting a +/// reference to a symbol that has no definition. +void MachineModuleInfo:: +takeDeletedSymbolsForFunction(const Function *F, + std::vector<MCSymbol*> &Result) { + // If no blocks have had their addresses taken, we're done. + if (!AddrLabelSymbols) return; + return AddrLabelSymbols-> + takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result); +} + +//===- EH -----------------------------------------------------------------===// + +/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the +/// specified MachineBasicBlock. +LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo + (MachineBasicBlock *LandingPad) { + unsigned N = LandingPads.size(); + for (unsigned i = 0; i < N; ++i) { + LandingPadInfo &LP = LandingPads[i]; + if (LP.LandingPadBlock == LandingPad) + return LP; + } + + LandingPads.push_back(LandingPadInfo(LandingPad)); + return LandingPads[N]; +} + +/// addInvoke - Provide the begin and end labels of an invoke style call and +/// associate it with a try landing pad block. +void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad, + MCSymbol *BeginLabel, MCSymbol *EndLabel) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.BeginLabels.push_back(BeginLabel); + LP.EndLabels.push_back(EndLabel); +} + +/// addLandingPad - Provide the label of a try LandingPad block. +/// +MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) { + MCSymbol *LandingPadLabel = Context.createTempSymbol(); + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.LandingPadLabel = LandingPadLabel; + return LandingPadLabel; +} + +void MachineModuleInfo::addPersonality(const Function *Personality) { + for (unsigned i = 0; i < Personalities.size(); ++i) + if (Personalities[i] == Personality) + return; + Personalities.push_back(Personality); +} + +/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad. +/// +void MachineModuleInfo:: +addCatchTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalValue *> TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + for (unsigned N = TyInfo.size(); N; --N) + LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1])); +} + +/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad. +/// +void MachineModuleInfo:: +addFilterTypeInfo(MachineBasicBlock *LandingPad, + ArrayRef<const GlobalValue *> TyInfo) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + std::vector<unsigned> IdsInFilter(TyInfo.size()); + for (unsigned I = 0, E = TyInfo.size(); I != E; ++I) + IdsInFilter[I] = getTypeIDFor(TyInfo[I]); + LP.TypeIds.push_back(getFilterIDFor(IdsInFilter)); +} + +/// addCleanup - Add a cleanup action for a landing pad. +/// +void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + LP.TypeIds.push_back(0); +} + +void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad, + const Function *Filter, + const BlockAddress *RecoverBA) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + SEHHandler Handler; + Handler.FilterOrFinally = Filter; + Handler.RecoverBA = RecoverBA; + LP.SEHHandlers.push_back(Handler); +} + +void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad, + const Function *Cleanup) { + LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad); + SEHHandler Handler; + Handler.FilterOrFinally = Cleanup; + Handler.RecoverBA = nullptr; + LP.SEHHandlers.push_back(Handler); +} + +/// TidyLandingPads - Remap landing pad labels and remove any deleted landing +/// pads. +void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) { + for (unsigned i = 0; i != LandingPads.size(); ) { + LandingPadInfo &LandingPad = LandingPads[i]; + if (LandingPad.LandingPadLabel && + !LandingPad.LandingPadLabel->isDefined() && + (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0)) + LandingPad.LandingPadLabel = nullptr; + + // Special case: we *should* emit LPs with null LP MBB. This indicates + // "nounwind" case. + if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) { + MCSymbol *BeginLabel = LandingPad.BeginLabels[j]; + MCSymbol *EndLabel = LandingPad.EndLabels[j]; + if ((BeginLabel->isDefined() || + (LPMap && (*LPMap)[BeginLabel] != 0)) && + (EndLabel->isDefined() || + (LPMap && (*LPMap)[EndLabel] != 0))) continue; + + LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j); + LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j); + --j, --e; + } + + // Remove landing pads with no try-ranges. + if (LandingPads[i].BeginLabels.empty()) { + LandingPads.erase(LandingPads.begin() + i); + continue; + } + + // If there is no landing pad, ensure that the list of typeids is empty. + // If the only typeid is a cleanup, this is the same as having no typeids. + if (!LandingPad.LandingPadBlock || + (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0])) + LandingPad.TypeIds.clear(); + ++i; + } +} + +/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site +/// indexes. +void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym, + ArrayRef<unsigned> Sites) { + LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end()); +} + +/// getTypeIDFor - Return the type id for the specified typeinfo. This is +/// function wide. +unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) { + for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i) + if (TypeInfos[i] == TI) return i + 1; + + TypeInfos.push_back(TI); + return TypeInfos.size(); +} + +/// getFilterIDFor - Return the filter id for the specified typeinfos. This is +/// function wide. +int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) { + // If the new filter coincides with the tail of an existing filter, then + // re-use the existing filter. Folding filters more than this requires + // re-ordering filters and/or their elements - probably not worth it. + for (std::vector<unsigned>::iterator I = FilterEnds.begin(), + E = FilterEnds.end(); I != E; ++I) { + unsigned i = *I, j = TyIds.size(); + + while (i && j) + if (FilterIds[--i] != TyIds[--j]) + goto try_next; + + if (!j) + // The new filter coincides with range [i, end) of the existing filter. + return -(1 + i); + +try_next:; + } + + // Add the new filter. + int FilterID = -(1 + FilterIds.size()); + FilterIds.reserve(FilterIds.size() + TyIds.size() + 1); + FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end()); + FilterEnds.push_back(FilterIds.size()); + FilterIds.push_back(0); // terminator + return FilterID; +} diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp new file mode 100644 index 0000000..22d519e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfoImpls.cpp @@ -0,0 +1,44 @@ +//===-- llvm/CodeGen/MachineModuleInfoImpls.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements object-file format specific implementations of +// MachineModuleInfoImpl. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// MachineModuleInfoMachO +//===----------------------------------------------------------------------===// + +// Out of line virtual method. +void MachineModuleInfoMachO::anchor() {} +void MachineModuleInfoELF::anchor() {} + +static int SortSymbolPair(const void *LHS, const void *RHS) { + typedef std::pair<MCSymbol*, MachineModuleInfoImpl::StubValueTy> PairTy; + const MCSymbol *LHSS = ((const PairTy *)LHS)->first; + const MCSymbol *RHSS = ((const PairTy *)RHS)->first; + return LHSS->getName().compare(RHSS->getName()); +} + +MachineModuleInfoImpl::SymbolListTy MachineModuleInfoImpl::getSortedStubs( + DenseMap<MCSymbol *, MachineModuleInfoImpl::StubValueTy> &Map) { + MachineModuleInfoImpl::SymbolListTy List(Map.begin(), Map.end()); + + if (!List.empty()) + qsort(&List[0], List.size(), sizeof(List[0]), SortSymbolPair); + + Map.clear(); + return List; +} + diff --git a/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp new file mode 100644 index 0000000..3ee3e40 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePassRegistry.cpp @@ -0,0 +1,55 @@ +//===-- CodeGen/MachineInstr.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the machine function pass registry for register allocators +// and instruction schedulers. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePassRegistry.h" + +using namespace llvm; + +void MachinePassRegistryListener::anchor() { } + +/// setDefault - Set the default constructor by name. +void MachinePassRegistry::setDefault(StringRef Name) { + MachinePassCtor Ctor = nullptr; + for(MachinePassRegistryNode *R = getList(); R; R = R->getNext()) { + if (R->getName() == Name) { + Ctor = R->getCtor(); + break; + } + } + assert(Ctor && "Unregistered pass name"); + setDefault(Ctor); +} + +/// Add - Adds a function pass to the registration list. +/// +void MachinePassRegistry::Add(MachinePassRegistryNode *Node) { + Node->setNext(List); + List = Node; + if (Listener) Listener->NotifyAdd(Node->getName(), + Node->getCtor(), + Node->getDescription()); +} + + +/// Remove - Removes a function pass from the registration list. +/// +void MachinePassRegistry::Remove(MachinePassRegistryNode *Node) { + for (MachinePassRegistryNode **I = &List; *I; I = (*I)->getNextAddress()) { + if (*I == Node) { + if (Listener) Listener->NotifyRemove(Node->getName()); + *I = (*I)->getNext(); + break; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp new file mode 100644 index 0000000..c3f6e92 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachinePostDominators.cpp @@ -0,0 +1,55 @@ +//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements simple dominator construction algorithms for finding +// post dominators on machine functions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachinePostDominators.h" + +using namespace llvm; + +char MachinePostDominatorTree::ID = 0; + +//declare initializeMachinePostDominatorTreePass +INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree", + "MachinePostDominator Tree Construction", true, true) + +MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) { + initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry()); + DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate + // postdominator +} + +FunctionPass * +MachinePostDominatorTree::createMachinePostDominatorTreePass() { + return new MachinePostDominatorTree(); +} + +bool +MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { + DT->recalculate(F); + return false; +} + +MachinePostDominatorTree::~MachinePostDominatorTree() { + delete DT; +} + +void +MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void +MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const { + DT->print(OS); +} diff --git a/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp new file mode 100644 index 0000000..01d2c2e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineRegionInfo.cpp @@ -0,0 +1,140 @@ + +#include "llvm/CodeGen/MachineRegionInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/RegionInfoImpl.h" +#include "llvm/CodeGen/MachinePostDominators.h" + +#define DEBUG_TYPE "region" + +using namespace llvm; + +STATISTIC(numMachineRegions, "The # of machine regions"); +STATISTIC(numMachineSimpleRegions, "The # of simple machine regions"); + +namespace llvm { +template class RegionBase<RegionTraits<MachineFunction>>; +template class RegionNodeBase<RegionTraits<MachineFunction>>; +template class RegionInfoBase<RegionTraits<MachineFunction>>; +} + +//===----------------------------------------------------------------------===// +// MachineRegion implementation +// + +MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit, + MachineRegionInfo* RI, + MachineDominatorTree *DT, MachineRegion *Parent) : + RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) { + +} + +MachineRegion::~MachineRegion() { } + +//===----------------------------------------------------------------------===// +// MachineRegionInfo implementation +// + +MachineRegionInfo::MachineRegionInfo() : + RegionInfoBase<RegionTraits<MachineFunction>>() { + +} + +MachineRegionInfo::~MachineRegionInfo() { + +} + +void MachineRegionInfo::updateStatistics(MachineRegion *R) { + ++numMachineRegions; + + // TODO: Slow. Should only be enabled if -stats is used. + if (R->isSimple()) + ++numMachineSimpleRegions; +} + +void MachineRegionInfo::recalculate(MachineFunction &F, + MachineDominatorTree *DT_, + MachinePostDominatorTree *PDT_, + MachineDominanceFrontier *DF_) { + DT = DT_; + PDT = PDT_; + DF = DF_; + + MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&F); + + TopLevelRegion = new MachineRegion(Entry, nullptr, this, DT, nullptr); + updateStatistics(TopLevelRegion); + calculate(F); +} + +//===----------------------------------------------------------------------===// +// MachineRegionInfoPass implementation +// + +MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) { + initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry()); +} + +MachineRegionInfoPass::~MachineRegionInfoPass() { + +} + +bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) { + releaseMemory(); + + auto DT = &getAnalysis<MachineDominatorTree>(); + auto PDT = &getAnalysis<MachinePostDominatorTree>(); + auto DF = &getAnalysis<MachineDominanceFrontier>(); + + RI.recalculate(F, DT, PDT, DF); + return false; +} + +void MachineRegionInfoPass::releaseMemory() { + RI.releaseMemory(); +} + +void MachineRegionInfoPass::verifyAnalysis() const { + // Only do verification when user wants to, otherwise this expensive check + // will be invoked by PMDataManager::verifyPreservedAnalysis when + // a regionpass (marked PreservedAll) finish. + if (MachineRegionInfo::VerifyRegionInfo) + RI.verifyAnalysis(); +} + +void MachineRegionInfoPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTree>(); + AU.addRequired<DominanceFrontier>(); +} + +void MachineRegionInfoPass::print(raw_ostream &OS, const Module *) const { + RI.print(OS); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void MachineRegionInfoPass::dump() const { + RI.dump(); +} +#endif + +char MachineRegionInfoPass::ID = 0; + +INITIALIZE_PASS_BEGIN(MachineRegionInfoPass, "regions", + "Detect single entry single exit regions", true, true) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier) +INITIALIZE_PASS_END(MachineRegionInfoPass, "regions", + "Detect single entry single exit regions", true, true) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +namespace llvm { + FunctionPass *createMachineRegionInfoPass() { + return new MachineRegionInfoPass(); + } +} + diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp new file mode 100644 index 0000000..03c82f4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -0,0 +1,498 @@ +//===-- lib/Codegen/MachineRegisterInfo.cpp -------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MachineRegisterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +// Pin the vtable to this file. +void MachineRegisterInfo::Delegate::anchor() {} + +MachineRegisterInfo::MachineRegisterInfo(const MachineFunction *MF) + : MF(MF), TheDelegate(nullptr), IsSSA(true), TracksLiveness(true), + TracksSubRegLiveness(false) { + unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); + VRegInfo.reserve(256); + RegAllocHints.reserve(256); + UsedPhysRegMask.resize(NumRegs); + PhysRegUseDefLists.reset(new MachineOperand*[NumRegs]()); +} + +/// setRegClass - Set the register class of the specified virtual register. +/// +void +MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); + VRegInfo[Reg].first = RC; +} + +const TargetRegisterClass * +MachineRegisterInfo::constrainRegClass(unsigned Reg, + const TargetRegisterClass *RC, + unsigned MinNumRegs) { + const TargetRegisterClass *OldRC = getRegClass(Reg); + if (OldRC == RC) + return RC; + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); + if (!NewRC || NewRC == OldRC) + return NewRC; + if (NewRC->getNumRegs() < MinNumRegs) + return nullptr; + setRegClass(Reg, NewRC); + return NewRC; +} + +bool +MachineRegisterInfo::recomputeRegClass(unsigned Reg) { + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterClass *OldRC = getRegClass(Reg); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC, *MF); + + // Stop early if there is no room to grow. + if (NewRC == OldRC) + return false; + + // Accumulate constraints from all uses. + for (MachineOperand &MO : reg_nodbg_operands(Reg)) { + // Apply the effect of the given operand to NewRC. + MachineInstr *MI = MO.getParent(); + unsigned OpNo = &MO - &MI->getOperand(0); + NewRC = MI->getRegClassConstraintEffect(OpNo, NewRC, TII, + getTargetRegisterInfo()); + if (!NewRC || NewRC == OldRC) + return false; + } + setRegClass(Reg, NewRC); + return true; +} + +/// createVirtualRegister - Create and return a new virtual register in the +/// function with the specified register class. +/// +unsigned +MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ + assert(RegClass && "Cannot create register without RegClass!"); + assert(RegClass->isAllocatable() && + "Virtual register RegClass must be allocatable."); + + // New virtual register number. + unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs()); + VRegInfo.grow(Reg); + VRegInfo[Reg].first = RegClass; + RegAllocHints.grow(Reg); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); + return Reg; +} + +/// clearVirtRegs - Remove all virtual registers (after physreg assignment). +void MachineRegisterInfo::clearVirtRegs() { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (!VRegInfo[Reg].second) + continue; + verifyUseList(Reg); + llvm_unreachable("Remaining virtual register operands"); + } +#endif + VRegInfo.clear(); + for (auto &I : LiveIns) + I.second = 0; +} + +void MachineRegisterInfo::verifyUseList(unsigned Reg) const { +#ifndef NDEBUG + bool Valid = true; + for (MachineOperand &M : reg_operands(Reg)) { + MachineOperand *MO = &M; + MachineInstr *MI = MO->getParent(); + if (!MI) { + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO + << " has no parent instruction.\n"; + Valid = false; + continue; + } + MachineOperand *MO0 = &MI->getOperand(0); + unsigned NumOps = MI->getNumOperands(); + if (!(MO >= MO0 && MO < MO0+NumOps)) { + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO + << " doesn't belong to parent MI: " << *MI; + Valid = false; + } + if (!MO->isReg()) { + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " MachineOperand " << MO << ": " << *MO + << " is not a register\n"; + Valid = false; + } + if (MO->getReg() != Reg) { + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use-list MachineOperand " << MO << ": " + << *MO << " is the wrong register\n"; + Valid = false; + } + } + assert(Valid && "Invalid use list"); +#endif +} + +void MachineRegisterInfo::verifyUseLists() const { +#ifndef NDEBUG + for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) + verifyUseList(TargetRegisterInfo::index2VirtReg(i)); + for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) + verifyUseList(i); +#endif +} + +/// Add MO to the linked list of operands for its register. +void MachineRegisterInfo::addRegOperandToUseList(MachineOperand *MO) { + assert(!MO->isOnRegUseList() && "Already on list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + + // Head points to the first list element. + // Next is NULL on the last list element. + // Prev pointers are circular, so Head->Prev == Last. + + // Head is NULL for an empty list. + if (!Head) { + MO->Contents.Reg.Prev = MO; + MO->Contents.Reg.Next = nullptr; + HeadRef = MO; + return; + } + assert(MO->getReg() == Head->getReg() && "Different regs on the same list!"); + + // Insert MO between Last and Head in the circular Prev chain. + MachineOperand *Last = Head->Contents.Reg.Prev; + assert(Last && "Inconsistent use list"); + assert(MO->getReg() == Last->getReg() && "Different regs on the same list!"); + Head->Contents.Reg.Prev = MO; + MO->Contents.Reg.Prev = Last; + + // Def operands always precede uses. This allows def_iterator to stop early. + // Insert def operands at the front, and use operands at the back. + if (MO->isDef()) { + // Insert def at the front. + MO->Contents.Reg.Next = Head; + HeadRef = MO; + } else { + // Insert use at the end. + MO->Contents.Reg.Next = nullptr; + Last->Contents.Reg.Next = MO; + } +} + +/// Remove MO from its use-def list. +void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { + assert(MO->isOnRegUseList() && "Operand not on use list"); + MachineOperand *&HeadRef = getRegUseDefListHead(MO->getReg()); + MachineOperand *const Head = HeadRef; + assert(Head && "List already empty"); + + // Unlink this from the doubly linked list of operands. + MachineOperand *Next = MO->Contents.Reg.Next; + MachineOperand *Prev = MO->Contents.Reg.Prev; + + // Prev links are circular, next link is NULL instead of looping back to Head. + if (MO == Head) + HeadRef = Next; + else + Prev->Contents.Reg.Next = Next; + + (Next ? Next : Head)->Contents.Reg.Prev = Prev; + + MO->Contents.Reg.Prev = nullptr; + MO->Contents.Reg.Next = nullptr; +} + +/// Move NumOps operands from Src to Dst, updating use-def lists as needed. +/// +/// The Dst range is assumed to be uninitialized memory. (Or it may contain +/// operands that won't be destroyed, which is OK because the MO destructor is +/// trivial anyway). +/// +/// The Src and Dst ranges may overlap. +void MachineRegisterInfo::moveOperands(MachineOperand *Dst, + MachineOperand *Src, + unsigned NumOps) { + assert(Src != Dst && NumOps && "Noop moveOperands"); + + // Copy backwards if Dst is within the Src range. + int Stride = 1; + if (Dst >= Src && Dst < Src + NumOps) { + Stride = -1; + Dst += NumOps - 1; + Src += NumOps - 1; + } + + // Copy one operand at a time. + do { + new (Dst) MachineOperand(*Src); + + // Dst takes Src's place in the use-def chain. + if (Src->isReg()) { + MachineOperand *&Head = getRegUseDefListHead(Src->getReg()); + MachineOperand *Prev = Src->Contents.Reg.Prev; + MachineOperand *Next = Src->Contents.Reg.Next; + assert(Head && "List empty, but operand is chained"); + assert(Prev && "Operand was not on use-def list"); + + // Prev links are circular, next link is NULL instead of looping back to + // Head. + if (Src == Head) + Head = Dst; + else + Prev->Contents.Reg.Next = Dst; + + // Update Prev pointer. This also works when Src was pointing to itself + // in a 1-element list. In that case Head == Dst. + (Next ? Next : Head)->Contents.Reg.Prev = Dst; + } + + Dst += Stride; + Src += Stride; + } while (--NumOps); +} + +/// replaceRegWith - Replace all instances of FromReg with ToReg in the +/// machine function. This is like llvm-level X->replaceAllUsesWith(Y), +/// except that it also changes any definitions of the register as well. +/// If ToReg is a physical register we apply the sub register to obtain the +/// final/proper physical register. +void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) { + assert(FromReg != ToReg && "Cannot replace a reg with itself"); + + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + + // TODO: This could be more efficient by bulk changing the operands. + for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) { + MachineOperand &O = *I; + ++I; + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + O.substPhysReg(ToReg, *TRI); + } else { + O.setReg(ToReg); + } + } +} + +/// getVRegDef - Return the machine instr that defines the specified virtual +/// register or null if none is found. This assumes that the code is in SSA +/// form, so there should only be one definition. +MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const { + // Since we are in SSA form, we can use the first definition. + def_instr_iterator I = def_instr_begin(Reg); + assert((I.atEnd() || std::next(I) == def_instr_end()) && + "getVRegDef assumes a single definition or no definition"); + return !I.atEnd() ? &*I : nullptr; +} + +/// getUniqueVRegDef - Return the unique machine instr that defines the +/// specified virtual register or null if none is found. If there are +/// multiple definitions or no definition, return null. +MachineInstr *MachineRegisterInfo::getUniqueVRegDef(unsigned Reg) const { + if (def_empty(Reg)) return nullptr; + def_instr_iterator I = def_instr_begin(Reg); + if (std::next(I) != def_instr_end()) + return nullptr; + return &*I; +} + +bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const { + use_nodbg_iterator UI = use_nodbg_begin(RegNo); + if (UI == use_nodbg_end()) + return false; + return ++UI == use_nodbg_end(); +} + +/// clearKillFlags - Iterate over all the uses of the given register and +/// clear the kill flag from the MachineOperand. This function is used by +/// optimization passes which extend register lifetimes and need only +/// preserve conservative kill flag information. +void MachineRegisterInfo::clearKillFlags(unsigned Reg) const { + for (MachineOperand &MO : use_operands(Reg)) + MO.setIsKill(false); +} + +bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == Reg || I->second == Reg) + return true; + return false; +} + +/// getLiveInPhysReg - If VReg is a live-in virtual register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->second == VReg) + return I->first; + return 0; +} + +/// getLiveInVirtReg - If PReg is a live-in physical register, return the +/// corresponding live-in physical register. +unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { + for (livein_iterator I = livein_begin(), E = livein_end(); I != E; ++I) + if (I->first == PReg) + return I->second; + return 0; +} + +/// EmitLiveInCopies - Emit copies to initialize livein virtual registers +/// into the given entry block. +void +MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, + const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + // Emit the copies into the top of the block. + for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) + if (LiveIns[i].second) { + if (use_empty(LiveIns[i].second)) { + // The livein has no uses. Drop it. + // + // It would be preferable to have isel avoid creating live-in + // records for unused arguments in the first place, but it's + // complicated by the debug info code for arguments. + LiveIns.erase(LiveIns.begin() + i); + --i; --e; + } else { + // Emit a copy. + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + TII.get(TargetOpcode::COPY), LiveIns[i].second) + .addReg(LiveIns[i].first); + + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } + } else { + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } +} + +LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const { + // Lane masks are only defined for vregs. + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const TargetRegisterClass &TRC = *getRegClass(Reg); + return TRC.getLaneMask(); +} + +#ifndef NDEBUG +void MachineRegisterInfo::dumpUses(unsigned Reg) const { + for (MachineInstr &I : use_instructions(Reg)) + I.dump(); +} +#endif + +void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { + ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF); + assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() && + "Invalid ReservedRegs vector from target"); +} + +bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, + const MachineFunction &MF) const { + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg)); + + // Check if any overlapping register is modified, or allocatable so it may be + // used later. + for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true); + AI.isValid(); ++AI) + if (!def_empty(*AI) || isAllocatable(*AI)) + return false; + return true; +} + +/// markUsesInDebugValueAsUndef - Mark every DBG_VALUE referencing the +/// specified register as undefined which causes the DBG_VALUE to be +/// deleted during LiveDebugVariables analysis. +void MachineRegisterInfo::markUsesInDebugValueAsUndef(unsigned Reg) const { + // Mark any DBG_VALUE that uses Reg as undef (but don't delete it.) + MachineRegisterInfo::use_instr_iterator nextI; + for (use_instr_iterator I = use_instr_begin(Reg), E = use_instr_end(); + I != E; I = nextI) { + nextI = std::next(I); // I is invalidated by the setReg + MachineInstr *UseMI = &*I; + if (UseMI->isDebugValue()) + UseMI->getOperand(0).setReg(0U); + } +} + +static const Function *getCalledFunction(const MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isGlobal()) + continue; + const Function *Func = dyn_cast<Function>(MO.getGlobal()); + if (Func != nullptr) + return Func; + } + return nullptr; +} + +static bool isNoReturnDef(const MachineOperand &MO) { + // Anything which is not a noreturn function is a real def. + const MachineInstr &MI = *MO.getParent(); + if (!MI.isCall()) + return false; + const MachineBasicBlock &MBB = *MI.getParent(); + if (!MBB.succ_empty()) + return false; + const MachineFunction &MF = *MBB.getParent(); + // We need to keep correct unwind information even if the function will + // not return, since the runtime may need it. + if (MF.getFunction()->hasFnAttribute(Attribute::UWTable)) + return false; + const Function *Called = getCalledFunction(MI); + return !(Called == nullptr || !Called->hasFnAttribute(Attribute::NoReturn) || + !Called->hasFnAttribute(Attribute::NoUnwind)); +} + +bool MachineRegisterInfo::isPhysRegModified(unsigned PhysReg) const { + if (UsedPhysRegMask.test(PhysReg)) + return true; + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) { + for (const MachineOperand &MO : make_range(def_begin(*AI), def_end())) { + if (isNoReturnDef(MO)) + continue; + return true; + } + } + return false; +} + +bool MachineRegisterInfo::isPhysRegUsed(unsigned PhysReg) const { + if (UsedPhysRegMask.test(PhysReg)) + return true; + const TargetRegisterInfo *TRI = getTargetRegisterInfo(); + for (MCRegAliasIterator AliasReg(PhysReg, TRI, true); AliasReg.isValid(); + ++AliasReg) { + if (!reg_nodbg_empty(*AliasReg)) + return true; + } + return false; +} diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp new file mode 100644 index 0000000..71a6eba --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -0,0 +1,356 @@ +//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineSSAUpdater class. It's based on SSAUpdater +// class in lib/Transforms/Utils. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/AlignOf.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/SSAUpdaterImpl.h" +using namespace llvm; + +#define DEBUG_TYPE "machine-ssaupdater" + +typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy; +static AvailableValsTy &getAvailableVals(void *AV) { + return *static_cast<AvailableValsTy*>(AV); +} + +MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF, + SmallVectorImpl<MachineInstr*> *NewPHI) + : AV(nullptr), InsertedPHIs(NewPHI) { + TII = MF.getSubtarget().getInstrInfo(); + MRI = &MF.getRegInfo(); +} + +MachineSSAUpdater::~MachineSSAUpdater() { + delete static_cast<AvailableValsTy*>(AV); +} + +/// Initialize - Reset this object to get ready for a new set of SSA +/// updates. ProtoValue is the value used to name PHI nodes. +void MachineSSAUpdater::Initialize(unsigned V) { + if (!AV) + AV = new AvailableValsTy(); + else + getAvailableVals(AV).clear(); + + VR = V; + VRC = MRI->getRegClass(VR); +} + +/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for +/// the specified block. +bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const { + return getAvailableVals(AV).count(BB); +} + +/// AddAvailableValue - Indicate that a rewritten value is available in the +/// specified block with the specified value. +void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) { + getAvailableVals(AV)[BB] = V; +} + +/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is +/// live at the end of the specified block. +unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { + return GetValueAtEndOfBlockInternal(BB); +} + +static +unsigned LookForIdenticalPHI(MachineBasicBlock *BB, + SmallVectorImpl<std::pair<MachineBasicBlock*, unsigned> > &PredValues) { + if (BB->empty()) + return 0; + + MachineBasicBlock::iterator I = BB->begin(); + if (!I->isPHI()) + return 0; + + AvailableValsTy AVals; + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + AVals[PredValues[i].first] = PredValues[i].second; + while (I != BB->end() && I->isPHI()) { + bool Same = true; + for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) { + unsigned SrcReg = I->getOperand(i).getReg(); + MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB(); + if (AVals[SrcBB] != SrcReg) { + Same = false; + break; + } + } + if (Same) + return I->getOperand(0).getReg(); + ++I; + } + return 0; +} + +/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define +/// a value of the given register class at the start of the specified basic +/// block. It returns the virtual register defined by the instruction. +static +MachineInstrBuilder InsertNewDef(unsigned Opcode, + MachineBasicBlock *BB, MachineBasicBlock::iterator I, + const TargetRegisterClass *RC, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII) { + unsigned NewVR = MRI->createVirtualRegister(RC); + return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR); +} + +/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that +/// is live in the middle of the specified block. +/// +/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one +/// important case: if there is a definition of the rewritten value after the +/// 'use' in BB. Consider code like this: +/// +/// X1 = ... +/// SomeBB: +/// use(X) +/// X2 = ... +/// br Cond, SomeBB, OutBB +/// +/// In this case, there are two values (X1 and X2) added to the AvailableVals +/// set by the client of the rewriter, and those values are both live out of +/// their respective blocks. However, the use of X happens in the *middle* of +/// a block. Because of this, we need to insert a new PHI node in SomeBB to +/// merge the appropriate values, and this value isn't live out of the block. +/// +unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { + // If there is no definition of the renamed variable in this block, just use + // GetValueAtEndOfBlock to do our work. + if (!HasValueForBlock(BB)) + return GetValueAtEndOfBlockInternal(BB); + + // If there are no predecessors, just return undef. + if (BB->pred_empty()) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + VRC, MRI, TII); + return NewDef->getOperand(0).getReg(); + } + + // Otherwise, we have the hard case. Get the live-in values for each + // predecessor. + SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues; + unsigned SingularValue = 0; + + bool isFirstPred = true; + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) { + MachineBasicBlock *PredBB = *PI; + unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB); + PredValues.push_back(std::make_pair(PredBB, PredVal)); + + // Compute SingularValue. + if (isFirstPred) { + SingularValue = PredVal; + isFirstPred = false; + } else if (PredVal != SingularValue) + SingularValue = 0; + } + + // Otherwise, if all the merged values are the same, just use it. + if (SingularValue != 0) + return SingularValue; + + // If an identical PHI is already in BB, just reuse it. + unsigned DupPHI = LookForIdenticalPHI(BB, PredValues); + if (DupPHI) + return DupPHI; + + // Otherwise, we do need a PHI: insert one now. + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); + MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, + Loc, VRC, MRI, TII); + + // Fill in all the predecessors of the PHI. + for (unsigned i = 0, e = PredValues.size(); i != e; ++i) + InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first); + + // See if the PHI node can be merged to a single value. This can happen in + // loop cases when we get a PHI of itself and one other value. + if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) { + InsertedPHI->eraseFromParent(); + return ConstVal; + } + + // If the client wants to know about all new instructions, tell it. + if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); + + DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n"); + return InsertedPHI->getOperand(0).getReg(); +} + +static +MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI, + MachineOperand *U) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + if (&MI->getOperand(i) == U) + return MI->getOperand(i+1).getMBB(); + } + + llvm_unreachable("MachineOperand::getParent() failure?"); +} + +/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes, +/// which use their value in the corresponding predecessor. +void MachineSSAUpdater::RewriteUse(MachineOperand &U) { + MachineInstr *UseMI = U.getParent(); + unsigned NewVR = 0; + if (UseMI->isPHI()) { + MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U); + NewVR = GetValueAtEndOfBlockInternal(SourceBB); + } else { + NewVR = GetValueInMiddleOfBlock(UseMI->getParent()); + } + + U.setReg(NewVR); +} + +/// SSAUpdaterTraits<MachineSSAUpdater> - Traits for the SSAUpdaterImpl +/// template, specialized for MachineSSAUpdater. +namespace llvm { +template<> +class SSAUpdaterTraits<MachineSSAUpdater> { +public: + typedef MachineBasicBlock BlkT; + typedef unsigned ValT; + typedef MachineInstr PhiT; + + typedef MachineBasicBlock::succ_iterator BlkSucc_iterator; + static BlkSucc_iterator BlkSucc_begin(BlkT *BB) { return BB->succ_begin(); } + static BlkSucc_iterator BlkSucc_end(BlkT *BB) { return BB->succ_end(); } + + /// Iterator for PHI operands. + class PHI_iterator { + private: + MachineInstr *PHI; + unsigned idx; + + public: + explicit PHI_iterator(MachineInstr *P) // begin iterator + : PHI(P), idx(1) {} + PHI_iterator(MachineInstr *P, bool) // end iterator + : PHI(P), idx(PHI->getNumOperands()) {} + + PHI_iterator &operator++() { idx += 2; return *this; } + bool operator==(const PHI_iterator& x) const { return idx == x.idx; } + bool operator!=(const PHI_iterator& x) const { return !operator==(x); } + unsigned getIncomingValue() { return PHI->getOperand(idx).getReg(); } + MachineBasicBlock *getIncomingBlock() { + return PHI->getOperand(idx+1).getMBB(); + } + }; + static inline PHI_iterator PHI_begin(PhiT *PHI) { return PHI_iterator(PHI); } + static inline PHI_iterator PHI_end(PhiT *PHI) { + return PHI_iterator(PHI, true); + } + + /// FindPredecessorBlocks - Put the predecessors of BB into the Preds + /// vector. + static void FindPredecessorBlocks(MachineBasicBlock *BB, + SmallVectorImpl<MachineBasicBlock*> *Preds){ + for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + E = BB->pred_end(); PI != E; ++PI) + Preds->push_back(*PI); + } + + /// GetUndefVal - Create an IMPLICIT_DEF instruction with a new register. + /// Add it into the specified block and return the register. + static unsigned GetUndefVal(MachineBasicBlock *BB, + MachineSSAUpdater *Updater) { + // Insert an implicit_def to represent an undef value. + MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF, + BB, BB->getFirstTerminator(), + Updater->VRC, Updater->MRI, + Updater->TII); + return NewDef->getOperand(0).getReg(); + } + + /// CreateEmptyPHI - Create a PHI instruction that defines a new register. + /// Add it into the specified block and return the register. + static unsigned CreateEmptyPHI(MachineBasicBlock *BB, unsigned NumPreds, + MachineSSAUpdater *Updater) { + MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); + MachineInstr *PHI = InsertNewDef(TargetOpcode::PHI, BB, Loc, + Updater->VRC, Updater->MRI, + Updater->TII); + return PHI->getOperand(0).getReg(); + } + + /// AddPHIOperand - Add the specified value as an operand of the PHI for + /// the specified predecessor block. + static void AddPHIOperand(MachineInstr *PHI, unsigned Val, + MachineBasicBlock *Pred) { + MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred); + } + + /// InstrIsPHI - Check if an instruction is a PHI. + /// + static MachineInstr *InstrIsPHI(MachineInstr *I) { + if (I && I->isPHI()) + return I; + return nullptr; + } + + /// ValueIsPHI - Check if the instruction that defines the specified register + /// is a PHI instruction. + static MachineInstr *ValueIsPHI(unsigned Val, MachineSSAUpdater *Updater) { + return InstrIsPHI(Updater->MRI->getVRegDef(Val)); + } + + /// ValueIsNewPHI - Like ValueIsPHI but also check if the PHI has no source + /// operands, i.e., it was just added. + static MachineInstr *ValueIsNewPHI(unsigned Val, MachineSSAUpdater *Updater) { + MachineInstr *PHI = ValueIsPHI(Val, Updater); + if (PHI && PHI->getNumOperands() <= 1) + return PHI; + return nullptr; + } + + /// GetPHIValue - For the specified PHI instruction, return the register + /// that it defines. + static unsigned GetPHIValue(MachineInstr *PHI) { + return PHI->getOperand(0).getReg(); + } +}; + +} // End llvm namespace + +/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry +/// for the specified BB and if so, return it. If not, construct SSA form by +/// first calculating the required placement of PHIs and then inserting new +/// PHIs where needed. +unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){ + AvailableValsTy &AvailableVals = getAvailableVals(AV); + if (unsigned V = AvailableVals[BB]) + return V; + + SSAUpdaterImpl<MachineSSAUpdater> Impl(this, &AvailableVals, InsertedPHIs); + return Impl.GetValue(BB); +} diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp new file mode 100644 index 0000000..bcee15c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -0,0 +1,3413 @@ +//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// MachineScheduler schedules machine instructions after phi elimination. It +// preserves LiveIntervals so it can be invoked before register allocation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include <queue> + +using namespace llvm; + +#define DEBUG_TYPE "misched" + +namespace llvm { +cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden, + cl::desc("Force top-down list scheduling")); +cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden, + cl::desc("Force bottom-up list scheduling")); +cl::opt<bool> +DumpCriticalPathLength("misched-dcpl", cl::Hidden, + cl::desc("Print critical path length to stdout")); +} + +#ifndef NDEBUG +static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden, + cl::desc("Pop up a window to show MISched dags after they are processed")); + +/// In some situations a few uninteresting nodes depend on nearly all other +/// nodes in the graph, provide a cutoff to hide them. +static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden, + cl::desc("Hide nodes with more predecessor/successor than cutoff")); + +static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden, + cl::desc("Stop scheduling after N instructions"), cl::init(~0U)); + +static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden, + cl::desc("Only schedule this function")); +static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden, + cl::desc("Only schedule this MBB#")); +#else +static bool ViewMISchedDAGs = false; +#endif // NDEBUG + +static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden, + cl::desc("Enable register pressure scheduling."), cl::init(true)); + +static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden, + cl::desc("Enable cyclic critical path analysis."), cl::init(true)); + +static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden, + cl::desc("Enable load clustering."), cl::init(true)); + +// Experimental heuristics +static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + +// DAG subtrees must have at least this many nodes. +static const unsigned MinSubtreeSize = 8; + +// Pin the vtables to this file. +void MachineSchedStrategy::anchor() {} +void ScheduleDAGMutation::anchor() {} + +//===----------------------------------------------------------------------===// +// Machine Instruction Scheduling Pass and Registry +//===----------------------------------------------------------------------===// + +MachineSchedContext::MachineSchedContext(): + MF(nullptr), MLI(nullptr), MDT(nullptr), PassConfig(nullptr), AA(nullptr), LIS(nullptr) { + RegClassInfo = new RegisterClassInfo(); +} + +MachineSchedContext::~MachineSchedContext() { + delete RegClassInfo; +} + +namespace { +/// Base class for a machine scheduler class that can run at any point. +class MachineSchedulerBase : public MachineSchedContext, + public MachineFunctionPass { +public: + MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {} + + void print(raw_ostream &O, const Module* = nullptr) const override; + +protected: + void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags); +}; + +/// MachineScheduler runs after coalescing and before register allocation. +class MachineScheduler : public MachineSchedulerBase { +public: + MachineScheduler(); + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction&) override; + + static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); +}; + +/// PostMachineScheduler runs after shortly before code emission. +class PostMachineScheduler : public MachineSchedulerBase { +public: + PostMachineScheduler(); + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction&) override; + + static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createPostMachineScheduler(); +}; +} // namespace + +char MachineScheduler::ID = 0; + +char &llvm::MachineSchedulerID = MachineScheduler::ID; + +INITIALIZE_PASS_BEGIN(MachineScheduler, "machine-scheduler", + "Machine Instruction Scheduler", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(MachineScheduler, "machine-scheduler", + "Machine Instruction Scheduler", false, false) + +MachineScheduler::MachineScheduler() +: MachineSchedulerBase(ID) { + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +char PostMachineScheduler::ID = 0; + +char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID; + +INITIALIZE_PASS(PostMachineScheduler, "postmisched", + "PostRA Machine Instruction Scheduler", false, false) + +PostMachineScheduler::PostMachineScheduler() +: MachineSchedulerBase(ID) { + initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry()); +} + +void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequiredID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +MachinePassRegistry MachineSchedRegistry::Registry; + +/// A dummy default scheduler factory indicates whether the scheduler +/// is overridden on the command line. +static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) { + return nullptr; +} + +/// MachineSchedOpt allows command line selection of the scheduler. +static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false, + RegisterPassParser<MachineSchedRegistry> > +MachineSchedOpt("misched", + cl::init(&useDefaultMachineSched), cl::Hidden, + cl::desc("Machine instruction scheduler to use")); + +static MachineSchedRegistry +DefaultSchedRegistry("default", "Use the target's default scheduler choice.", + useDefaultMachineSched); + +static cl::opt<bool> EnableMachineSched( + "enable-misched", + cl::desc("Enable the machine instruction scheduling pass."), cl::init(true), + cl::Hidden); + +/// Forward declare the standard machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C); + +/// Decrement this iterator until reaching the top or a non-debug instr. +static MachineBasicBlock::const_iterator +priorNonDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator Beg) { + assert(I != Beg && "reached the top of the region, cannot decrement"); + while (--I != Beg) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// Non-const version. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator Beg) { + return const_cast<MachineInstr*>( + &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); +} + +/// If this iterator is a debug value, increment until reaching the End or a +/// non-debug instruction. +static MachineBasicBlock::const_iterator +nextIfDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator End) { + for(; I != End; ++I) { + if (!I->isDebugValue()) + break; + } + return I; +} + +/// Non-const version. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator End) { + // Cast the return value to nonconst MachineInstr, then cast to an + // instr_iterator, which does not check for null, finally return a + // bundle_iterator. + return MachineBasicBlock::instr_iterator( + const_cast<MachineInstr*>( + &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); +} + +/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. +ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor != useDefaultMachineSched) + return Ctor(this); + + // Get the default scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSchedLive(this); +} + +/// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by +/// the caller. We don't have a command line option to override the postRA +/// scheduler. The Target must configure it. +ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() { + // Get the postRA scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSchedPostRA(this); +} + +/// Top-level MachineScheduler pass driver. +/// +/// Visit blocks in function order. Divide each block into scheduling regions +/// and visit them bottom-up. Visiting regions bottom-up is not required, but is +/// consistent with the DAG builder, which traverses the interior of the +/// scheduling regions bottom-up. +/// +/// This design avoids exposing scheduling boundaries to the DAG builder, +/// simplifying the DAG builder's support for "special" target instructions. +/// At the same time the design allows target schedulers to operate across +/// scheduling boundaries, for example to bundle the boudary instructions +/// without reordering them. This creates complexity, because the target +/// scheduler must update the RegionBegin and RegionEnd positions cached by +/// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler +/// design would be to split blocks at scheduling boundaries, but LLVM has a +/// general bias against block splitting purely for implementation simplicity. +bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (EnableMachineSched.getNumOccurrences()) { + if (!EnableMachineSched) + return false; + } else if (!mf.getSubtarget().enableMachineScheduler()) + return false; + + DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs())); + + // Initialize the context of the pass. + MF = &mf; + MLI = &getAnalysis<MachineLoopInfo>(); + MDT = &getAnalysis<MachineDominatorTree>(); + PassConfig = &getAnalysis<TargetPassConfig>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + + LIS = &getAnalysis<LiveIntervals>(); + + if (VerifyScheduling) { + DEBUG(LIS->dump()); + MF->verify(this, "Before machine scheduling."); + } + RegClassInfo->runOnMachineFunction(*MF); + + // Instantiate the selected scheduler for this target, function, and + // optimization level. + std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler()); + scheduleRegions(*Scheduler, false); + + DEBUG(LIS->dump()); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); + return true; +} + +bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { + if (skipOptnoneFunction(*mf.getFunction())) + return false; + + if (!mf.getSubtarget().enablePostRAScheduler()) { + DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); + return false; + } + DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); + + // Initialize the context of the pass. + MF = &mf; + PassConfig = &getAnalysis<TargetPassConfig>(); + + if (VerifyScheduling) + MF->verify(this, "Before post machine scheduling."); + + // Instantiate the selected scheduler for this target, function, and + // optimization level. + std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler()); + scheduleRegions(*Scheduler, true); + + if (VerifyScheduling) + MF->verify(this, "After post machine scheduling."); + return true; +} + +/// Return true of the given instruction should not be included in a scheduling +/// region. +/// +/// MachineScheduler does not currently support scheduling across calls. To +/// handle calls, the DAG builder needs to be modified to create register +/// anti/output dependencies on the registers clobbered by the call's regmask +/// operand. In PreRA scheduling, the stack pointer adjustment already prevents +/// scheduling across calls. In PostRA scheduling, we need the isCall to enforce +/// the boundary, but there would be no benefit to postRA scheduling across +/// calls this late anyway. +static bool isSchedBoundary(MachineBasicBlock::iterator MI, + MachineBasicBlock *MBB, + MachineFunction *MF, + const TargetInstrInfo *TII) { + return MI->isCall() || TII->isSchedulingBoundary(MI, MBB, *MF); +} + +/// Main driver for both MachineScheduler and PostMachineScheduler. +void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler, + bool FixKillFlags) { + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // Visit all machine basic blocks. + // + // TODO: Visit blocks in global postorder or postorder within the bottom-up + // loop tree. Then we can optionally compute global RegPressure. + for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end(); + MBB != MBBEnd; ++MBB) { + + Scheduler.startBlock(&*MBB); + +#ifndef NDEBUG + if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName()) + continue; + if (SchedOnlyBlock.getNumOccurrences() + && (int)SchedOnlyBlock != MBB->getNumber()) + continue; +#endif + + // Break the block into scheduling regions [I, RegionEnd), and schedule each + // region as soon as it is discovered. RegionEnd points the scheduling + // boundary at the bottom of the region. The DAG does not include RegionEnd, + // but the region does (i.e. the next RegionEnd is above the previous + // RegionBegin). If the current block has no terminator then RegionEnd == + // MBB->end() for the bottom region. + // + // The Scheduler may insert instructions during either schedule() or + // exitRegion(), even for empty regions. So the local iterators 'I' and + // 'RegionEnd' are invalid across these calls. + // + // MBB::size() uses instr_iterator to count. Here we need a bundle to count + // as a single instruction. + unsigned RemainingInstrs = std::distance(MBB->begin(), MBB->end()); + for(MachineBasicBlock::iterator RegionEnd = MBB->end(); + RegionEnd != MBB->begin(); RegionEnd = Scheduler.begin()) { + + // Avoid decrementing RegionEnd for blocks with no terminator. + if (RegionEnd != MBB->end() || + isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) { + --RegionEnd; + // Count the boundary instruction. + --RemainingInstrs; + } + + // The next region starts above the previous region. Look backward in the + // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; + MachineBasicBlock::iterator I = RegionEnd; + for(;I != MBB->begin(); --I, --RemainingInstrs) { + if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII)) + break; + if (!I->isDebugValue()) + ++NumRegionInstrs; + } + // Notify the scheduler of the region, even if we may skip scheduling + // it. Perhaps it still needs to be bundled. + Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs); + + // Skip empty scheduling regions (0 or 1 schedulable instructions). + if (I == RegionEnd || I == std::prev(RegionEnd)) { + // Close the current region. Bundle the terminator if needed. + // This invalidates 'RegionEnd' and 'I'. + Scheduler.exitRegion(); + continue; + } + DEBUG(dbgs() << "********** MI Scheduling **********\n"); + DEBUG(dbgs() << MF->getName() + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *I << " To: "; + if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; + else dbgs() << "End"; + dbgs() << " RegionInstrs: " << NumRegionInstrs + << " Remaining: " << RemainingInstrs << "\n"); + if (DumpCriticalPathLength) { + errs() << MF->getName(); + errs() << ":BB# " << MBB->getNumber(); + errs() << " " << MBB->getName() << " \n"; + } + + // Schedule a region: possibly reorder instructions. + // This invalidates 'RegionEnd' and 'I'. + Scheduler.schedule(); + + // Close the current region. + Scheduler.exitRegion(); + + // Scheduling has invalidated the current iterator 'I'. Ask the + // scheduler for the top of it's scheduled region. + RegionEnd = Scheduler.begin(); + } + assert(RemainingInstrs == 0 && "Instruction count mismatch!"); + Scheduler.finishBlock(); + // FIXME: Ideally, no further passes should rely on kill flags. However, + // thumb2 size reduction is currently an exception, so the PostMIScheduler + // needs to do this. + if (FixKillFlags) + Scheduler.fixupKills(&*MBB); + } + Scheduler.finalizeSchedule(); +} + +void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { + // unimplemented +} + +LLVM_DUMP_METHOD +void ReadyQueue::dump() { + dbgs() << "Queue " << Name << ": "; + for (unsigned i = 0, e = Queue.size(); i < e; ++i) + dbgs() << Queue[i]->NodeNum << " "; + dbgs() << "\n"; +} + +//===----------------------------------------------------------------------===// +// ScheduleDAGMI - Basic machine instruction scheduling. This is +// independent of PreRA/PostRA scheduling and involves no extra book-keeping for +// virtual registers. +// ===----------------------------------------------------------------------===/ + +// Provide a vtable anchor. +ScheduleDAGMI::~ScheduleDAGMI() { +} + +bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) { + return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU); +} + +bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { + if (SuccSU != &ExitSU) { + // Do not use WillCreateCycle, it assumes SD scheduling. + // If Pred is reachable from Succ, then the edge creates a cycle. + if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) + return false; + Topo.AddPred(SuccSU, PredDep.getSUnit()); + } + SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); + // Return true regardless of whether a new edge needed to be inserted. + return true; +} + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When +/// NumPredsLeft reaches zero, release the successor node. +/// +/// FIXME: Adjust SuccSU height based on MinLatency. +void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + if (SuccEdge->isCluster()) + NextClusterSucc = SuccSU; + return; + } +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) + SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); + + --SuccSU->NumPredsLeft; + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + SchedImpl->releaseTopNode(SuccSU); +} + +/// releaseSuccessors - Call releaseSucc on each of SU's successors. +void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + releaseSucc(SU, &*I); + } +} + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When +/// NumSuccsLeft reaches zero, release the predecessor node. +/// +/// FIXME: Adjust PredSU height based on MinLatency. +void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + + if (PredEdge->isWeak()) { + --PredSU->WeakSuccsLeft; + if (PredEdge->isCluster()) + NextClusterPred = PredSU; + return; + } +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) + PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); + + --PredSU->NumSuccsLeft; + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) + SchedImpl->releaseBottomNode(PredSU); +} + +/// releasePredecessors - Call releasePred on each of SU's predecessors. +void ScheduleDAGMI::releasePredecessors(SUnit *SU) { + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + releasePred(SU, &*I); + } +} + +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) +{ + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + + SchedImpl->initPolicy(begin, end, regioninstrs); +} + +/// This is normally called from the main scheduler loop but may also be invoked +/// by the scheduling strategy to perform additional code motion. +void ScheduleDAGMI::moveInstruction( + MachineInstr *MI, MachineBasicBlock::iterator InsertPos) { + // Advance RegionBegin if the first instruction moves down. + if (&*RegionBegin == MI) + ++RegionBegin; + + // Update the instruction stream. + BB->splice(InsertPos, BB, MI); + + // Update LiveIntervals + if (LIS) + LIS->handleMove(MI, /*UpdateFlags=*/true); + + // Recede RegionBegin if an instruction moves above the first. + if (RegionBegin == InsertPos) + RegionBegin = MI; +} + +bool ScheduleDAGMI::checkSchedLimit() { +#ifndef NDEBUG + if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) { + CurrentTop = CurrentBottom; + return false; + } + ++NumInstrsScheduled; +#endif + return true; +} + +/// Per-region scheduling driver, called back from +/// MachineScheduler::runOnMachineFunction. This is a simplified driver that +/// does not consider liveness or register pressure. It is useful for PostRA +/// scheduling and potentially other custom schedulers. +void ScheduleDAGMI::schedule() { + DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); + + // Build the DAG. + buildSchedGraph(AA); + + Topo.InitDAGTopologicalSorting(); + + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); + + bool IsTopNode = false; + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + MachineInstr *MI = SU->getInstr(); + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else + moveInstruction(MI, CurrentTop); + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, priorII); + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + } + // Notify the scheduling strategy before updating the DAG. + // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues + // runs, it can then use the accurate ReadyCycle time to determine whether + // newly released nodes can move to the readyQ. + SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Apply each ScheduleDAGMutation step in order. +void ScheduleDAGMI::postprocessDAG() { + for (unsigned i = 0, e = Mutations.size(); i < e; ++i) { + Mutations[i]->apply(this); + } +} + +void ScheduleDAGMI:: +findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots, + SmallVectorImpl<SUnit*> &BotRoots) { + for (std::vector<SUnit>::iterator + I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + + // A SUnit is ready to top schedule if it has no predecessors. + if (!I->NumPredsLeft) + TopRoots.push_back(SU); + // A SUnit is ready to bottom schedule if it has no successors. + if (!I->NumSuccsLeft) + BotRoots.push_back(SU); + } + ExitSU.biasCriticalPath(); +} + +/// Identify DAG roots and setup scheduler queues. +void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots, + ArrayRef<SUnit*> BotRoots) { + NextClusterSucc = nullptr; + NextClusterPred = nullptr; + + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl<SUnit*>::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl<SUnit*>::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } + + releaseSuccessors(&EntrySU); + releasePredecessors(&ExitSU); + + SchedImpl->registerRoots(); + + // Advance past initial DebugValues. + CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + CurrentBottom = RegionEnd; +} + +/// Update scheduler queues after scheduling an instruction. +void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { + // Release dependent instructions for scheduling. + if (IsTopNode) + releaseSuccessors(SU); + else + releasePredecessors(SU); + + SU->isScheduled = true; +} + +/// Reinsert any remaining debug_values, just like the PostRA scheduler. +void ScheduleDAGMI::placeDebugValues() { + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) { + BB->splice(RegionBegin, BB, FirstDbgValue); + RegionBegin = FirstDbgValue; + } + + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrevMI = P.second; + if (&*RegionBegin == DbgValue) + ++RegionBegin; + BB->splice(++OrigPrevMI, BB, DbgValue); + if (OrigPrevMI == std::prev(RegionEnd)) + RegionEnd = DbgValue; + } + DbgValues.clear(); + FirstDbgValue = nullptr; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGMI::dumpSchedule() const { + for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { + if (SUnit *SU = getSUnit(&(*MI))) + SU->dump(this); + else + dbgs() << "Missing SUnit\n"; + } +} +#endif + +//===----------------------------------------------------------------------===// +// ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals +// preservation. +//===----------------------------------------------------------------------===// + +ScheduleDAGMILive::~ScheduleDAGMILive() { + delete DFSResult; +} + +/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after +/// crossing a scheduling boundary. [begin, end) includes all instructions in +/// the region, including the boundary itself and single-instruction regions +/// that don't get scheduled. +void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) +{ + // ScheduleDAGMI initializes SchedImpl's per-region policy. + ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs); + + // For convenience remember the end of the liveness region. + LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd); + + SUPressureDiffs.clear(); + + ShouldTrackPressure = SchedImpl->shouldTrackPressure(); +} + +// Setup the register pressure trackers for the top scheduled top and bottom +// scheduled regions. +void ScheduleDAGMILive::initRegPressure() { + TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin); + BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + + // Close the RPTracker to finalize live ins. + RPTracker.closeRegion(); + + DEBUG(RPTracker.dump()); + + // Initialize the live ins and live outs. + TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); + BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs); + + // Close one end of the tracker so we can call + // getMaxUpward/DownwardPressureDelta before advancing across any + // instructions. This converts currently live regs into live ins/outs. + TopRPTracker.closeTop(); + BotRPTracker.closeBottom(); + + BotRPTracker.initLiveThru(RPTracker); + if (!BotRPTracker.getLiveThru().empty()) { + TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); + DEBUG(dbgs() << "Live Thru: "; + dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); + }; + + // For each live out vreg reduce the pressure change associated with other + // uses of the same vreg below the live-out reaching def. + updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); + + // Account for liveness generated by the region boundary. + if (LiveRegionEnd != RegionEnd) { + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + updatePressureDiffs(LiveUses); + } + + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + + assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); + + // Cache the list of excess pressure sets in this region. This will also track + // the max pressure in the scheduled code for these sets. + RegionCriticalPSets.clear(); + const std::vector<unsigned> &RegionPressure = + RPTracker.getPressure().MaxSetPressure; + for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { + unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) { + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << " Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); + RegionCriticalPSets.push_back(PressureChange(i)); + } + } + DEBUG(dbgs() << "Excess PSets: "; + for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) + dbgs() << TRI->getRegPressureSetName( + RegionCriticalPSets[i].getPSet()) << " "; + dbgs() << "\n"); +} + +void ScheduleDAGMILive:: +updateScheduledPressure(const SUnit *SU, + const std::vector<unsigned> &NewMaxPressure) { + const PressureDiff &PDiff = getPressureDiff(SU); + unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); + for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); + I != E; ++I) { + if (!I->isValid()) + break; + unsigned ID = I->getPSet(); + while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) + ++CritIdx; + if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { + if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() + && NewMaxPressure[ID] <= INT16_MAX) + RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); + } + unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); + if (NewMaxPressure[ID] >= Limit - 2) { + DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " + << NewMaxPressure[ID] + << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ") << Limit + << "(+ " << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + } + } +} + +/// Update the PressureDiff array for liveness after scheduling this +/// instruction. +void ScheduleDAGMILive::updatePressureDiffs(ArrayRef<unsigned> LiveUses) { + for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { + /// FIXME: Currently assuming single-use physregs. + unsigned Reg = LiveUses[LUIdx]; + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + if (!TRI->isVirtualRegister(Reg)) + continue; + + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + // If this use comes before the reaching def, it cannot be a last use, so + // descrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) { + PressureDiff &PDiff = getPressureDiff(SU); + PDiff.addPressureChange(Reg, true, &MRI); + DEBUG( + dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr(); + dbgs() << " to "; + PDiff.dump(*TRI); + ); + } + } + } + } +} + +/// schedule - Called back from MachineScheduler::runOnMachineFunction +/// after setting up the current scheduling region. [RegionBegin, RegionEnd) +/// only includes instructions that have DAG nodes, not scheduling boundaries. +/// +/// This is a skeletal driver, with all the functionality pushed into helpers, +/// so that it can be easily extended by experimental schedulers. Generally, +/// implementing MachineSchedStrategy should be sufficient to implement a new +/// scheduling algorithm. However, if a scheduler further subclasses +/// ScheduleDAGMILive then it will want to override this virtual method in order +/// to update any specialized state. +void ScheduleDAGMILive::schedule() { + DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n"); + DEBUG(SchedImpl->dumpPolicy()); + buildDAGWithRegPressure(); + + Topo.InitDAGTopologicalSorting(); + + postprocessDAG(); + + SmallVector<SUnit*, 8> TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + if (ShouldTrackPressure) { + dbgs() << " Pressure Diff : "; + getPressureDiff(&SU).dump(*TRI); + } + dbgs() << '\n'; + } + ); + if (ViewMISchedDAGs) viewGraph(); + + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); + + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } + + bool IsTopNode = false; + while (true) { + DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n"); + SUnit *SU = SchedImpl->pickNode(IsTopNode); + if (!SU) break; + + assert(!SU->isScheduled && "Node already scheduled"); + if (!checkSchedLimit()) + break; + + scheduleMI(SU, IsTopNode); + + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + + // Notify the scheduling strategy after updating the DAG. + SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); + } + assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + + placeDebugValues(); + + DEBUG({ + unsigned BBNum = begin()->getParent()->getNumber(); + dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +/// Build the DAG and setup three register pressure trackers. +void ScheduleDAGMILive::buildDAGWithRegPressure() { + if (!ShouldTrackPressure) { + RPTracker.reset(); + RegionCriticalPSets.clear(); + buildSchedGraph(AA); + return; + } + + // Initialize the register pressure tracker used by buildSchedGraph. + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + /*TrackUntiedDefs=*/true); + + // Account for liveness generate by the region boundary. + if (LiveRegionEnd != RegionEnd) + RPTracker.recede(); + + // Build the DAG, and compute current register pressure. + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); + + // Initialize top/bottom trackers after computing region pressure. + initRegPressure(); +} + +void ScheduleDAGMILive::computeDFSResult() { + if (!DFSResult) + DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); + DFSResult->clear(); + ScheduledTrees.clear(); + DFSResult->resize(SUnits.size()); + DFSResult->compute(SUnits); + ScheduledTrees.resize(DFSResult->getNumSubtrees()); +} + +/// Compute the max cyclic critical path through the DAG. The scheduling DAG +/// only provides the critical path for single block loops. To handle loops that +/// span blocks, we could use the vreg path latencies provided by +/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently +/// available for use in the scheduler. +/// +/// The cyclic path estimation identifies a def-use pair that crosses the back +/// edge and considers the depth and height of the nodes. For example, consider +/// the following instruction sequence where each instruction has unit latency +/// and defines an epomymous virtual register: +/// +/// a->b(a,c)->c(b)->d(c)->exit +/// +/// The cyclic critical path is a two cycles: b->c->b +/// The acyclic critical path is four cycles: a->b->c->d->exit +/// LiveOutHeight = height(c) = len(c->d->exit) = 2 +/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 +/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 +/// LiveInDepth = depth(b) = len(a->b) = 1 +/// +/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 +/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 +/// CyclicCriticalPath = min(2, 2) = 2 +/// +/// This could be relevant to PostRA scheduling, but is currently implemented +/// assuming LiveIntervals. +unsigned ScheduleDAGMILive::computeCyclicCriticalPath() { + // This only applies to single block loop. + if (!BB->isSuccessor(BB)) + return 0; + + unsigned MaxCyclicLatency = 0; + // Visit each live out vreg def to find def/use pairs that cross iterations. + ArrayRef<unsigned> LiveOuts = RPTracker.getPressure().LiveOutRegs; + for (ArrayRef<unsigned>::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + if (!TRI->isVirtualRegister(Reg)) + continue; + const LiveInterval &LI = LIS->getInterval(Reg); + const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + if (!DefVNI) + continue; + + MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); + const SUnit *DefSU = getSUnit(DefMI); + if (!DefSU) + continue; + + unsigned LiveOutHeight = DefSU->getHeight(); + unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; + // Visit all local users of the vreg def. + for (const VReg2SUnit &V2SU + : make_range(VRegUses.find(Reg), VRegUses.end())) { + SUnit *SU = V2SU.SU; + if (SU == &ExitSU) + continue; + + // Only consider uses of the phi. + LiveQueryResult LRQ = + LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (!LRQ.valueIn()->isPHIDef()) + continue; + + // Assume that a path spanning two iterations is a cycle, which could + // overestimate in strange cases. This allows cyclic latency to be + // estimated as the minimum slack of the vreg's depth or height. + unsigned CyclicLatency = 0; + if (LiveOutDepth > SU->getDepth()) + CyclicLatency = LiveOutDepth - SU->getDepth(); + + unsigned LiveInHeight = SU->getHeight() + DefSU->Latency; + if (LiveInHeight > LiveOutHeight) { + if (LiveInHeight - LiveOutHeight < CyclicLatency) + CyclicLatency = LiveInHeight - LiveOutHeight; + } + else + CyclicLatency = 0; + + DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" + << SU->NodeNum << ") = " << CyclicLatency << "c\n"); + if (CyclicLatency > MaxCyclicLatency) + MaxCyclicLatency = CyclicLatency; + } + } + DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); + return MaxCyclicLatency; +} + +/// Move an instruction and update register pressure. +void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) { + // Move the instruction to its new location in the instruction stream. + MachineInstr *MI = SU->getInstr(); + + if (IsTopNode) { + assert(SU->isTopReady() && "node still has unscheduled dependencies"); + if (&*CurrentTop == MI) + CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom); + else { + moveInstruction(MI, CurrentTop); + TopRPTracker.setPos(MI); + } + + if (ShouldTrackPressure) { + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + DEBUG( + dbgs() << "Top Pressure:\n"; + dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI); + ); + + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); + } + } + else { + assert(SU->isBottomReady() && "node still has unscheduled dependencies"); + MachineBasicBlock::iterator priorII = + priorNonDebug(CurrentBottom, CurrentTop); + if (&*priorII == MI) + CurrentBottom = priorII; + else { + if (&*CurrentTop == MI) { + CurrentTop = nextIfDebug(++CurrentTop, priorII); + TopRPTracker.setPos(CurrentTop); + } + moveInstruction(MI, CurrentBottom); + CurrentBottom = MI; + } + if (ShouldTrackPressure) { + // Update bottom scheduled pressure. + SmallVector<unsigned, 8> LiveUses; + BotRPTracker.recede(&LiveUses); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + DEBUG( + dbgs() << "Bottom Pressure:\n"; + dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI); + ); + + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); + updatePressureDiffs(LiveUses); + } + } +} + +//===----------------------------------------------------------------------===// +// LoadClusterMutation - DAG post-processing to cluster loads. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between neighboring +/// loads. +class LoadClusterMutation : public ScheduleDAGMutation { + struct LoadInfo { + SUnit *SU; + unsigned BaseReg; + unsigned Offset; + LoadInfo(SUnit *su, unsigned reg, unsigned ofs) + : SU(su), BaseReg(reg), Offset(ofs) {} + + bool operator<(const LoadInfo &RHS) const { + return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset); + } + }; + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; +public: + LoadClusterMutation(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) + : TII(tii), TRI(tri) {} + + void apply(ScheduleDAGMI *DAG) override; +protected: + void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG); +}; +} // anonymous + +void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads, + ScheduleDAGMI *DAG) { + SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords; + for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { + SUnit *SU = Loads[Idx]; + unsigned BaseReg; + unsigned Offset; + if (TII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) + LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); + } + if (LoadRecords.size() < 2) + return; + std::sort(LoadRecords.begin(), LoadRecords.end()); + unsigned ClusterLength = 1; + for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { + if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { + ClusterLength = 1; + continue; + } + + SUnit *SUa = LoadRecords[Idx].SU; + SUnit *SUb = LoadRecords[Idx+1].SU; + if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) + && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { + + DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since nearby + // loads should have effectively the same inputs. + for (SUnit::const_succ_iterator + SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { + if (SI->getSUnit() == SUb) + continue; + DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); + DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + } + ++ClusterLength; + } + else + ClusterLength = 1; + } +} + +/// \brief Callback from DAG postProcessing to create cluster edges for loads. +void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { + // Map DAG NodeNum to store chain ID. + DenseMap<unsigned, unsigned> StoreChainIDs; + // Map each store chain to a set of dependent loads. + SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents; + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->mayLoad()) + continue; + unsigned ChainPredID = DAG->SUnits.size(); + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->isCtrl()) { + ChainPredID = PI->getSUnit()->NodeNum; + break; + } + } + // Check if this chain-like pred has been seen + // before. ChainPredID==MaxNodeID for loads at the top of the schedule. + unsigned NumChains = StoreChainDependents.size(); + std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result = + StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); + if (Result.second) + StoreChainDependents.resize(NumChains + 1); + StoreChainDependents[Result.first->second].push_back(SU); + } + // Iterate over the store chains. + for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) + clusterNeighboringLoads(StoreChainDependents[Idx], DAG); +} + +//===----------------------------------------------------------------------===// +// MacroFusion - DAG post-processing to encourage fusion of macro ops. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between instructions +/// that may be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; +public: + MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) + : TII(TII), TRI(TRI) {} + + void apply(ScheduleDAGMI *DAG) override; +}; +} // anonymous + +/// Returns true if \p MI reads a register written by \p Other. +static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI, + const MachineInstr &Other) { + for (const MachineOperand &MO : MI.uses()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + + unsigned Reg = MO.getReg(); + if (Other.modifiesRegister(Reg, &TRI)) + return true; + } + return false; +} + +/// \brief Callback from DAG postProcessing to create cluster edges to encourage +/// fused operations. +void MacroFusion::apply(ScheduleDAGMI *DAG) { + // For now, assume targets can only fuse with the branch. + SUnit &ExitSU = DAG->ExitSU; + MachineInstr *Branch = ExitSU.getInstr(); + if (!Branch) + return; + + for (SUnit &SU : DAG->SUnits) { + // SUnits with successors can't be schedule in front of the ExitSU. + if (!SU.Succs.empty()) + continue; + // We only care if the node writes to a register that the branch reads. + MachineInstr *Pred = SU.getInstr(); + if (!HasDataDep(TRI, *Branch, *Pred)) + continue; + + if (!TII.shouldScheduleAdjacent(Pred, Branch)) + continue; + + // Create a single weak edge from SU to ExitSU. The only effect is to cause + // bottom-up scheduling to heavily prioritize the clustered SU. There is no + // need to copy predecessor edges from ExitSU to SU, since top-down + // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling + // of SU, we could create an artificial edge from the deepest root, but it + // hasn't been needed yet. + bool Success = DAG->addEdge(&ExitSU, SDep(&SU, SDep::Cluster)); + (void)Success; + assert(Success && "No DAG nodes should be reachable from ExitSU"); + + DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n"); + break; + } +} + +//===----------------------------------------------------------------------===// +// CopyConstrain - DAG post-processing to encourage copy elimination. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create weak edges from all uses of a copy to +/// the one use that defines the copy's source vreg, most likely an induction +/// variable increment. +class CopyConstrain : public ScheduleDAGMutation { + // Transient state. + SlotIndex RegionBeginIdx; + // RegionEndIdx is the slot index of the last non-debug instruction in the + // scheduling region. So we may have RegionBeginIdx == RegionEndIdx. + SlotIndex RegionEndIdx; +public: + CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {} + + void apply(ScheduleDAGMI *DAG) override; + +protected: + void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG); +}; +} // anonymous + +/// constrainLocalCopy handles two possibilities: +/// 1) Local src: +/// I0: = dst +/// I1: src = ... +/// I2: = dst +/// I3: dst = src (copy) +/// (create pred->succ edges I0->I1, I2->I1) +/// +/// 2) Local copy: +/// I0: dst = src (copy) +/// I1: = dst +/// I2: src = ... +/// I3: = dst +/// (create pred->succ edges I1->I2, I3->I2) +/// +/// Although the MachineScheduler is currently constrained to single blocks, +/// this algorithm should handle extended blocks. An EBB is a set of +/// contiguously numbered blocks such that the previous block in the EBB is +/// always the single predecessor. +void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) { + LiveIntervals *LIS = DAG->getLIS(); + MachineInstr *Copy = CopySU->getInstr(); + + // Check for pure vreg copies. + unsigned SrcReg = Copy->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return; + + unsigned DstReg = Copy->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return; + + // Check if either the dest or source is local. If it's live across a back + // edge, it's not local. Note that if both vregs are live across the back + // edge, we cannot successfully contrain the copy without cyclic scheduling. + // If both the copy's source and dest are local live intervals, then we + // should treat the dest as the global for the purpose of adding + // constraints. This adds edges from source's other uses to the copy. + unsigned LocalReg = SrcReg; + unsigned GlobalReg = DstReg; + LiveInterval *LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) { + LocalReg = DstReg; + GlobalReg = SrcReg; + LocalLI = &LIS->getInterval(LocalReg); + if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) + return; + } + LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg); + + // Find the global segment after the start of the local LI. + LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex()); + // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a + // local live range. We could create edges from other global uses to the local + // start, but the coalescer should have already eliminated these cases, so + // don't bother dealing with it. + if (GlobalSegment == GlobalLI->end()) + return; + + // If GlobalSegment is killed at the LocalLI->start, the call to find() + // returned the next global segment. But if GlobalSegment overlaps with + // LocalLI->start, then advance to the next segement. If a hole in GlobalLI + // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole. + if (GlobalSegment->contains(LocalLI->beginIndex())) + ++GlobalSegment; + + if (GlobalSegment == GlobalLI->end()) + return; + + // Check if GlobalLI contains a hole in the vicinity of LocalLI. + if (GlobalSegment != GlobalLI->begin()) { + // Two address defs have no hole. + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end, + GlobalSegment->start)) { + return; + } + // If the prior global segment may be defined by the same two-address + // instruction that also defines LocalLI, then can't make a hole here. + if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start, + LocalLI->beginIndex())) { + return; + } + // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise + // it would be a disconnected component in the live range. + assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() && + "Disconnected LRG within the scheduling region."); + } + MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start); + if (!GlobalDef) + return; + + SUnit *GlobalSU = DAG->getSUnit(GlobalDef); + if (!GlobalSU) + return; + + // GlobalDef is the bottom of the GlobalLI hole. Open the hole by + // constraining the uses of the last local def to precede GlobalDef. + SmallVector<SUnit*,8> LocalUses; + const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex()); + MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def); + SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef); + for (SUnit::const_succ_iterator + I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || I->getReg() != LocalReg) + continue; + if (I->getSUnit() == GlobalSU) + continue; + if (!DAG->canAddEdge(GlobalSU, I->getSUnit())) + return; + LocalUses.push_back(I->getSUnit()); + } + // Open the top of the GlobalLI hole by constraining any earlier global uses + // to precede the start of LocalLI. + SmallVector<SUnit*,8> GlobalUses; + MachineInstr *FirstLocalDef = + LIS->getInstructionFromIndex(LocalLI->beginIndex()); + SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef); + for (SUnit::const_pred_iterator + I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) { + if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg) + continue; + if (I->getSUnit() == FirstLocalSU) + continue; + if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit())) + return; + GlobalUses.push_back(I->getSUnit()); + } + DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n"); + // Add the weak edges. + for (SmallVectorImpl<SUnit*>::const_iterator + I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Local use SU(" << (*I)->NodeNum << ") -> SU(" + << GlobalSU->NodeNum << ")\n"); + DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak)); + } + for (SmallVectorImpl<SUnit*>::const_iterator + I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) { + DEBUG(dbgs() << " Global use SU(" << (*I)->NodeNum << ") -> SU(" + << FirstLocalSU->NodeNum << ")\n"); + DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak)); + } +} + +/// \brief Callback from DAG postProcessing to create weak edges to encourage +/// copy elimination. +void CopyConstrain::apply(ScheduleDAGMI *DAG) { + assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals"); + + MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end()); + if (FirstPos == DAG->end()) + return; + RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos); + RegionEndIdx = DAG->getLIS()->getInstructionIndex( + &*priorNonDebug(DAG->end(), DAG->begin())); + + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->isCopy()) + continue; + + constrainLocalCopy(SU, static_cast<ScheduleDAGMILive*>(DAG)); + } +} + +//===----------------------------------------------------------------------===// +// MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler +// and possibly other custom schedulers. +//===----------------------------------------------------------------------===// + +static const unsigned InvalidCycle = ~0U; + +SchedBoundary::~SchedBoundary() { delete HazardRec; } + +void SchedBoundary::reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = nullptr; + } + Available.clear(); + Pending.clear(); + CheckPending = false; + NextSUs.clear(); + CurrCycle = 0; + CurrMOps = 0; + MinReadyCycle = UINT_MAX; + ExpectedLatency = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; + IsResourceLimited = false; + ReservedCycles.clear(); +#ifndef NDEBUG + // Track the maximum number of stall cycles that could arise either from the + // latency of a DAG edge or the number of cycles that a processor resource is + // reserved (SchedBoundary::ReservedCycles). + MaxObservedStall = 0; +#endif + // Reserve a zero-count for invalid CritResIdx. + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); +} + +void SchedRemainder:: +init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { + reset(); + if (!SchedModel->hasInstrSchedModel()) + return; + RemainingCounts.resize(SchedModel->getNumProcResourceKinds()); + for (std::vector<SUnit>::iterator + I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { + const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); + RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + * SchedModel->getMicroOpFactor(); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + unsigned Factor = SchedModel->getResourceFactor(PIdx); + RemainingCounts[PIdx] += (Factor * PI->Cycles); + } + } +} + +void SchedBoundary:: +init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { + reset(); + DAG = dag; + SchedModel = smodel; + Rem = rem; + if (SchedModel->hasInstrSchedModel()) { + ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); + ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + } +} + +/// Compute the stall cycles based on this SUnit's ready time. Heuristics treat +/// these "soft stalls" differently than the hard stall cycles based on CPU +/// resources and computed by checkHazard(). A fully in-order model +/// (MicroOpBufferSize==0) will not make use of this since instructions are not +/// available for scheduling until they are ready. However, a weaker in-order +/// model may use this for heuristics. For example, if a processor has in-order +/// behavior when reading certain resources, this may come into play. +unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) { + if (!SU->isUnbuffered) + return 0; + + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + if (ReadyCycle > CurrCycle) + return ReadyCycle - CurrCycle; + return 0; +} + +/// Compute the next cycle at which the given processor resource can be +/// scheduled. +unsigned SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[PIdx]; + // If this resource has never been used, always return cycle zero. + if (NextUnreserved == InvalidCycle) + return 0; + // For bottom-up scheduling add the cycles needed for the current operation. + if (!isTop()) + NextUnreserved += Cycles; + return NextUnreserved; +} + +/// Does this SU have a hazard within the current instruction group. +/// +/// The scheduler supports two modes of hazard recognition. The first is the +/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that +/// supports highly complicated in-order reservation tables +/// (ScoreboardHazardRecognizer) and arbitraty target-specific logic. +/// +/// The second is a streamlined mechanism that checks for hazards based on +/// simple counters that the scheduler itself maintains. It explicitly checks +/// for instruction dispatch limitations, including the number of micro-ops that +/// can dispatch per cycle. +/// +/// TODO: Also check whether the SU must start a new group. +bool SchedBoundary::checkHazard(SUnit *SU) { + if (HazardRec->isEnabled() + && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { + return true; + } + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); + if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" + << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); + return true; + } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + if (NRCycle > CurrCycle) { +#ifndef NDEBUG + MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); +#endif + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(PI->ProcResourceIdx) + << "=" << NRCycle << "c\n"); + return true; + } + } + } + return false; +} + +// Find the unscheduled node in ReadySUs with the highest latency. +unsigned SchedBoundary:: +findMaxLatency(ArrayRef<SUnit*> ReadySUs) { + SUnit *LateSU = nullptr; + unsigned RemLatency = 0; + for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) { + RemLatency = L; + LateSU = *I; + } + } + if (LateSU) { + DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); + } + return RemLatency; +} + +// Count resources in this zone and the remaining unscheduled +// instruction. Return the max count, scaled. Set OtherCritIdx to the critical +// resource index, or zero if the zone is issue limited. +unsigned SchedBoundary:: +getOtherResourceCount(unsigned &OtherCritIdx) { + OtherCritIdx = 0; + if (!SchedModel->hasInstrSchedModel()) + return 0; + + unsigned OtherCritCount = Rem->RemIssueCount + + (RetiredMOps * SchedModel->getMicroOpFactor()); + DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; + if (OtherCount > OtherCritCount) { + OtherCritCount = OtherCount; + OtherCritIdx = PIdx; + } + } + if (OtherCritIdx) { + DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << SchedModel->getResourceName(OtherCritIdx) << "\n"); + } + return OtherCritCount; +} + +void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + +#ifndef NDEBUG + // ReadyCycle was been bumped up to the CurrCycle when this node was + // scheduled, but CurrCycle may have been eagerly advanced immediately after + // scheduling, so may now be greater than ReadyCycle. + if (ReadyCycle > CurrCycle) + MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); +#endif + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + // Check for interlocks first. For the purpose of other heuristics, an + // instruction that cannot issue appears as if it's not in the ReadyQueue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) + Pending.push(SU); + else + Available.push(SU); + + // Record this node as an immediate dependent of the scheduled node. + NextSUs.insert(SU); +} + +void SchedBoundary::releaseTopNode(SUnit *SU) { + if (SU->isScheduled) + return; + + releaseNode(SU, SU->TopReadyCycle); +} + +void SchedBoundary::releaseBottomNode(SUnit *SU) { + if (SU->isScheduled) + return; + + releaseNode(SU, SU->BotReadyCycle); +} + +/// Move the boundary of scheduled code by one cycle. +void SchedBoundary::bumpCycle(unsigned NextCycle) { + if (SchedModel->getMicroOpBufferSize() == 0) { + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) + NextCycle = MinReadyCycle; + } + // Update the current micro-ops, which will issue in the next cycle. + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); + CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + + // Decrement DependentLatency based on the next cycle. + if ((NextCycle - CurrCycle) > DependentLatency) + DependentLatency = 0; + else + DependentLatency -= (NextCycle - CurrCycle); + + if (!HazardRec->isEnabled()) { + // Bypass HazardRec virtual calls. + CurrCycle = NextCycle; + } + else { + // Bypass getHazardType calls in case of long latency. + for (; CurrCycle != NextCycle; ++CurrCycle) { + if (isTop()) + HazardRec->AdvanceCycle(); + else + HazardRec->RecedeCycle(); + } + } + CheckPending = true; + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + + DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +} + +void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) { + ExecutedResCounts[PIdx] += Count; + if (ExecutedResCounts[PIdx] > MaxExecutedResCount) + MaxExecutedResCount = ExecutedResCounts[PIdx]; +} + +/// Add the given processor resource to this scheduled zone. +/// +/// \param Cycles indicates the number of consecutive (non-pipelined) cycles +/// during which this resource is consumed. +/// +/// \return the next cycle at which the instruction may execute without +/// oversubscribing resources. +unsigned SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { + unsigned Factor = SchedModel->getResourceFactor(PIdx); + unsigned Count = Factor * Cycles; + DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) + << " +" << Cycles << "x" << Factor << "u\n"); + + // Update Executed resources counts. + incExecutedResources(PIdx, Count); + assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); + Rem->RemainingCounts[PIdx] -= Count; + + // Check if this resource exceeds the current critical resource. If so, it + // becomes the critical resource. + if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { + ZoneCritResIdx = PIdx; + DEBUG(dbgs() << " *** Critical resource " + << SchedModel->getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); + } + // For reserved resources, record the highest cycle using the resource. + unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + if (NextAvailable > CurrCycle) { + DEBUG(dbgs() << " Resource conflict: " + << SchedModel->getProcResource(PIdx)->Name << " reserved until @" + << NextAvailable << "\n"); + } + return NextAvailable; +} + +/// Move the boundary of scheduled code by one SUnit. +void SchedBoundary::bumpNode(SUnit *SU) { + // Update the reservation table. + if (HazardRec->isEnabled()) { + if (!isTop() && SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + HazardRec->EmitInstruction(SU); + } + // checkHazard should prevent scheduling multiple instructions per cycle that + // exceed the issue width. + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); + assert( + (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) && + "Cannot schedule this instruction's MicroOps in the current cycle."); + + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + + unsigned NextCycle = CurrCycle; + switch (SchedModel->getMicroOpBufferSize()) { + case 0: + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + break; + case 1: + if (ReadyCycle > NextCycle) { + NextCycle = ReadyCycle; + DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + } + break; + default: + // We don't currently model the OOO reorder buffer, so consider all + // scheduled MOps to be "retired". We do loosely model in-order resource + // latency. If this instruction uses an in-order resource, account for any + // likely stall cycles. + if (SU->isUnbuffered && ReadyCycle > NextCycle) + NextCycle = ReadyCycle; + break; + } + RetiredMOps += IncMOps; + + // Update resource counts and critical resource. + if (SchedModel->hasInstrSchedModel()) { + unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); + assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); + Rem->RemIssueCount -= DecRemIssue; + if (ZoneCritResIdx) { + // Scale scheduled micro-ops for comparing with the critical resource. + unsigned ScaledMOps = + RetiredMOps * SchedModel->getMicroOpFactor(); + + // If scaled micro-ops are now more than the previous critical resource by + // a full cycle, then micro-ops issue becomes critical. + if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + >= (int)SchedModel->getLatencyFactor()) { + ZoneCritResIdx = 0; + DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + } + } + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; + } + if (SU->hasReservedResource) { + // For reserved resources, record the highest cycle using the resource. + // For top-down scheduling, this is the cycle in which we schedule this + // instruction plus the number of cycles the operations reserves the + // resource. For bottom-up is it simply the instruction's cycle. + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + unsigned PIdx = PI->ProcResourceIdx; + if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + if (isTop()) { + ReservedCycles[PIdx] = + std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + } + else + ReservedCycles[PIdx] = NextCycle; + } + } + } + } + // Update ExpectedLatency and DependentLatency. + unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; + unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + if (SU->getDepth() > TopLatency) { + TopLatency = SU->getDepth(); + DEBUG(dbgs() << " " << Available.getName() + << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); + } + if (SU->getHeight() > BotLatency) { + BotLatency = SU->getHeight(); + DEBUG(dbgs() << " " << Available.getName() + << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); + } + // If we stall for any reason, bump the cycle. + if (NextCycle > CurrCycle) { + bumpCycle(NextCycle); + } + else { + // After updating ZoneCritResIdx and ExpectedLatency, check if we're + // resource limited. If a stall occurred, bumpCycle does this. + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + } + // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle + // resets CurrMOps. Loop to handle instructions with more MOps than issue in + // one cycle. Since we commonly reach the max MOps here, opportunistically + // bump the cycle to avoid uselessly checking everything in the readyQ. + CurrMOps += IncMOps; + while (CurrMOps >= SchedModel->getIssueWidth()) { + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + bumpCycle(++NextCycle); + } + DEBUG(dumpScheduledState()); +} + +/// Release pending ready nodes in to the available queue. This makes them +/// visible to heuristics. +void SchedBoundary::releasePending() { + // If the available queue is empty, it is safe to reset MinReadyCycle. + if (Available.empty()) + MinReadyCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + for (unsigned i = 0, e = Pending.size(); i != e; ++i) { + SUnit *SU = *(Pending.begin()+i); + unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; + + if (ReadyCycle < MinReadyCycle) + MinReadyCycle = ReadyCycle; + + if (!IsBuffered && ReadyCycle > CurrCycle) + continue; + + if (checkHazard(SU)) + continue; + + Available.push(SU); + Pending.remove(Pending.begin()+i); + --i; --e; + } + DEBUG(if (!Pending.empty()) Pending.dump()); + CheckPending = false; +} + +/// Remove SU from the ready set for this boundary. +void SchedBoundary::removeReady(SUnit *SU) { + if (Available.isInQueue(SU)) + Available.remove(Available.find(SU)); + else { + assert(Pending.isInQueue(SU) && "bad ready count"); + Pending.remove(Pending.find(SU)); + } +} + +/// If this queue only has one ready candidate, return it. As a side effect, +/// defer any nodes that now hit a hazard, and advance the cycle until at least +/// one node is ready. If multiple instructions are ready, return NULL. +SUnit *SchedBoundary::pickOnlyChoice() { + if (CheckPending) + releasePending(); + + if (CurrMOps > 0) { + // Defer any ready instrs that now have a hazard. + for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { + if (checkHazard(*I)) { + Pending.push(*I); + I = Available.remove(I); + continue; + } + ++I; + } + } + for (unsigned i = 0; Available.empty(); ++i) { +// FIXME: Re-enable assert once PR20057 is resolved. +// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && +// "permanent hazard"); + (void)i; + bumpCycle(CurrCycle + 1); + releasePending(); + } + if (Available.size() == 1) + return *Available.begin(); + return nullptr; +} + +#ifndef NDEBUG +// This is useful information to dump after bumpNode. +// Note that the Queue contents are more useful before pickNodeFromQueue. +void SchedBoundary::dumpScheduledState() { + unsigned ResFactor; + unsigned ResCount; + if (ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); + ResCount = getResourceCount(ZoneCritResIdx); + } + else { + ResFactor = SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + } + unsigned LFactor = SchedModel->getLatencyFactor(); + dbgs() << Available.getName() << " @" << CurrCycle << "c\n" + << " Retired: " << RetiredMOps; + dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; + dbgs() << "\n Critical: " << ResCount / LFactor << "c, " + << ResCount / ResFactor << " " + << SchedModel->getResourceName(ZoneCritResIdx) + << "\n ExpectedLatency: " << ExpectedLatency << "c\n" + << (IsResourceLimited ? " - Resource" : " - Latency") + << " limited.\n"; +} +#endif + +//===----------------------------------------------------------------------===// +// GenericScheduler - Generic implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +void GenericSchedulerBase::SchedCandidate:: +initResourceDelta(const ScheduleDAGMI *DAG, + const TargetSchedModel *SchedModel) { + if (!Policy.ReduceResIdx && !Policy.DemandResIdx) + return; + + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel->getWriteProcResBegin(SC), + PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { + if (PI->ProcResourceIdx == Policy.ReduceResIdx) + ResDelta.CritResources += PI->Cycles; + if (PI->ProcResourceIdx == Policy.DemandResIdx) + ResDelta.DemandedResources += PI->Cycles; + } +} + +/// Set the CandPolicy given a scheduling zone given the current resources and +/// latencies inside and outside the zone. +void GenericSchedulerBase::setPolicy(CandPolicy &Policy, + bool IsPostRA, + SchedBoundary &CurrZone, + SchedBoundary *OtherZone) { + // Apply preemptive heuristics based on the total latency and resources + // inside and outside this zone. Potential stalls should be considered before + // following this policy. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = CurrZone.getDependentLatency(); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Available.elements())); + RemLatency = std::max(RemLatency, + CurrZone.findMaxLatency(CurrZone.Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx = 0; + unsigned OtherCount = + OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0; + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + // Schedule aggressively for latency in PostRA mode. We don't check for + // acyclic latency during PostRA, and highly out-of-order processors will + // skip PostRA scheduling. + if (!OtherResLimited) { + if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << CurrZone.Available.getName() + << " RemainingLatency " << RemLatency << " + " + << CurrZone.getCurrCycle() << "c > CritPath " + << Rem.CriticalPath << "\n"); + } + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (CurrZone.getZoneCritResIdx() == OtherCritIdx) + return; + + DEBUG( + if (CurrZone.isResourceLimited()) { + dbgs() << " " << CurrZone.Available.getName() << " ResourceLimited: " + << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) + << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " + << SchedModel->getResourceName(OtherCritIdx) << "\n"; + if (!CurrZone.isResourceLimited() && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx) + Policy.ReduceResIdx = CurrZone.getZoneCritResIdx(); + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +#ifndef NDEBUG +const char *GenericSchedulerBase::getReasonStr( + GenericSchedulerBase::CandReason Reason) { + switch (Reason) { + case NoCand: return "NOCAND "; + case PhysRegCopy: return "PREG-COPY"; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; + case Stall: return "STALL "; + case Cluster: return "CLUSTER "; + case Weak: return "WEAK "; + case RegMax: return "REG-MAX "; + case ResourceReduce: return "RES-REDUCE"; + case ResourceDemand: return "RES-DEMAND"; + case TopDepthReduce: return "TOP-DEPTH "; + case TopPathReduce: return "TOP-PATH "; + case BotHeightReduce:return "BOT-HEIGHT"; + case BotPathReduce: return "BOT-PATH "; + case NextDefUse: return "DEF-USE "; + case NodeOrder: return "ORDER "; + }; + llvm_unreachable("Unknown reason!"); +} + +void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; + unsigned ResIdx = 0; + unsigned Latency = 0; + switch (Cand.Reason) { + default: + break; + case RegExcess: + P = Cand.RPDelta.Excess; + break; + case RegCritical: + P = Cand.RPDelta.CriticalMax; + break; + case RegMax: + P = Cand.RPDelta.CurrentMax; + break; + case ResourceReduce: + ResIdx = Cand.Policy.ReduceResIdx; + break; + case ResourceDemand: + ResIdx = Cand.Policy.DemandResIdx; + break; + case TopDepthReduce: + Latency = Cand.SU->getDepth(); + break; + case TopPathReduce: + Latency = Cand.SU->getHeight(); + break; + case BotHeightReduce: + Latency = Cand.SU->getHeight(); + break; + case BotPathReduce: + Latency = Cand.SU->getDepth(); + break; + } + dbgs() << " Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); + if (P.isValid()) + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; + else + dbgs() << " "; + if (ResIdx) + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; + else + dbgs() << " "; + if (Latency) + dbgs() << " " << Latency << " cycles "; + else + dbgs() << " "; + dbgs() << '\n'; +} +#endif + +/// Return true if this heuristic determines order. +static bool tryLess(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { + if (TryVal < CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal > CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + Cand.setRepeat(Reason); + return false; +} + +static bool tryGreater(int TryVal, int CandVal, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason) { + if (TryVal > CandVal) { + TryCand.Reason = Reason; + return true; + } + if (TryVal < CandVal) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + Cand.setRepeat(Reason); + return false; +} + +static bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericSchedulerBase::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericSchedulerBase::BotPathReduce)) + return true; + } + return false; +} + +static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, + bool IsTop) { + DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") + << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); +} + +void GenericScheduler::initialize(ScheduleDAGMI *dag) { + assert(dag->hasVRegLiveness() && + "(PreRA)GenericScheduler needs vreg liveness"); + DAG = static_cast<ScheduleDAGMILive*>(dag); + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + Bot.init(DAG, SchedModel, &Rem); + + // Initialize resource counts. + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or + // are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + if (!Top.HazardRec) { + Top.HazardRec = + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); + } +} + +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const MachineFunction &MF = *Begin->getParent()->getParent(); + const TargetLowering *TLI = MF.getSubtarget().getTargetLowering(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + RegionPolicy.ShouldTrackPressure = true; + for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) { + MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT; + if (TLI->isTypeLegal(LegalIntVT)) { + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TLI->getRegClassFor(LegalIntVT)); + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + } + } + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + MF.getSubtarget().overrideSchedPolicy(RegionPolicy, Begin, End, + NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } +} + +void GenericScheduler::dumpPolicy() { + dbgs() << "GenericScheduler RegionPolicy: " + << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure + << " OnlyTopDown=" << RegionPolicy.OnlyTopDown + << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp + << "\n"; +} + +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (std::vector<SUnit*>::const_iterator + I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n"; + } + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } +} + +static bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericSchedulerBase::SchedCandidate &TryCand, + GenericSchedulerBase::SchedCandidate &Cand, + GenericSchedulerBase::CandReason Reason, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) { + unsigned TryPSet = TryP.getPSetOrMax(); + unsigned CandPSet = CandP.getPSetOrMax(); + // If both candidates affect the same set, go with the smallest increase. + if (TryPSet == CandPSet) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, + Reason); + } + // If one candidate decreases and the other increases, go with it. + // Invalid candidates have UnitInc==0. + if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { + return true; + } + + int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) : + std::numeric_limits<int>::max(); + + int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) : + std::numeric_limits<int>::max(); + + // If the candidates are decreasing pressure, reverse priority. + if (TryP.getUnitInc() < 0) + std::swap(TryRank, CandRank); + return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); +} + +static unsigned getWeakLeft(const SUnit *SU, bool isTop) { + return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; +} + +/// Minimize physical register live ranges. Regalloc wants them adjacent to +/// their physreg def/use. +/// +/// FIXME: This is an unnecessary check on the critical path. Most are root/leaf +/// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled +/// with the operation that produces or consumes the physreg. We'll do this when +/// regalloc has support for parallel copies. +static int biasPhysRegCopy(const SUnit *SU, bool isTop) { + const MachineInstr *MI = SU->getInstr(); + if (!MI->isCopy()) + return 0; + + unsigned ScheduledOper = isTop ? 1 : 0; + unsigned UnscheduledOper = isTop ? 0 : 1; + // If we have already scheduled the physreg produce/consumer, immediately + // schedule the copy. + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(ScheduledOper).getReg())) + return 1; + // If the physreg is at the boundary, defer it. Otherwise schedule it + // immediately to free the dependent. We can hoist the copy later. + bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft; + if (TargetRegisterInfo::isPhysicalRegister( + MI->getOperand(UnscheduledOper).getReg())) + return AtBoundary ? -1 : 1; + return 0; +} + +/// Apply a set of heursitics to a new candidate. Heuristics are currently +/// hierarchical. This may be more efficient than a graduated cost model because +/// we don't need to evaluate all aspects of the model for each node in the +/// queue. But it's really done to make the heuristics easier to debug and +/// statistically analyze. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +/// \param Zone describes the scheduled zone that we are extending. +/// \param RPTracker describes reg pressure within the scheduled zone. +/// \param TempTracker is a scratch pressure tracker to reuse in queries. +void GenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand, + SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + RegPressureTracker &TempTracker) { + + if (DAG->isTrackingPressure()) { + // Always initialize TryCand's RPDelta. + if (Zone.isTop()) { + TempTracker.getMaxDownwardPressureDelta( + TryCand.SU->getInstr(), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + if (VerifyScheduling) { + TempTracker.getMaxUpwardPressureDelta( + TryCand.SU->getInstr(), + &DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + RPTracker.getUpwardPressureDelta( + TryCand.SU->getInstr(), + DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + } + } + DEBUG(if (TryCand.RPDelta.Excess.isValid()) + dbgs() << " Try SU(" << TryCand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) + << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()), + biasPhysRegCopy(Cand.SU, Zone.isTop()), + TryCand, Cand, PhysRegCopy)) + return; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess, TRI, + DAG->MF)) + return; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, + DAG->MF)) + return; + + // For loops that are acyclic path limited, aggressively schedule for latency. + // This can result in very long dependence chains scheduled in sequence, so + // once every cycle (when CurrMOps == 0), switch to normal heuristics. + if (Rem.IsAcyclicLatencyLimited && !Zone.getCurrMOps() + && tryLatency(TryCand, Cand, Zone)) + return; + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone.getLatencyStallCycles(TryCand.SU), + Zone.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *NextClusterSU = + Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, + TryCand, Cand, Cluster)) + return; + + // Weak edges are for clustering and other constraints. + if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), + getWeakLeft(Cand.SU, Zone.isTop()), + TryCand, Cand, Weak)) { + return; + } + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax, TRI, + DAG->MF)) + return; + + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + // For acyclic path limited loops, latency was already checked above. + if (!RegionPolicy.DisableLatencyHeuristic && Cand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, Zone)) { + return; + } + + // Prefer immediate defs/users of the last scheduled instruction. This is a + // local pressure avoidance strategy that also makes the machine code + // readable. + if (tryGreater(Zone.isNextSU(TryCand.SU), Zone.isNextSU(Cand.SU), + TryCand, Cand, NextDefUse)) + return; + + // Fall through to original instruction order. + if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) + || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } +} + +/// Pick the best candidate from the queue. +/// +/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during +/// DAG building. To adjust for the current scheduling location we need to +/// maintain the number of vreg uses remaining to be top-scheduled. +void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, + const RegPressureTracker &RPTracker, + SchedCandidate &Cand) { + ReadyQueue &Q = Zone.Available; + + DEBUG(Q.dump()); + + // getMaxPressureDelta temporarily modifies the tracker. + RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker); + + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker); + if (TryCand.Reason != NoCand) { + // Initialize resource delta if needed in case future heuristics query it. + if (TryCand.ResDelta == SchedResourceDelta()) + TryCand.initResourceDelta(DAG, SchedModel); + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand)); + } + } +} + +/// Pick the best candidate node from either the top or bottom queue. +SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { + // Schedule as far as possible in the direction of no choice. This is most + // efficient, but also provides the best heuristics for CriticalPSets. + if (SUnit *SU = Bot.pickOnlyChoice()) { + IsTopNode = false; + DEBUG(dbgs() << "Pick Bot ONLY1\n"); + return SU; + } + if (SUnit *SU = Top.pickOnlyChoice()) { + IsTopNode = true; + DEBUG(dbgs() << "Pick Top ONLY1\n"); + return SU; + } + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + SchedCandidate TopCand(NoPolicy); + // Set the bottom-up policy based on the state of the current bottom zone and + // the instructions outside the zone, including the top zone. + setPolicy(BotCand.Policy, /*IsPostRA=*/false, Bot, &Top); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/false, Top, &Bot); + + // Prefer bottom scheduling when heuristics are silent. + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + + // If either Q has a single candidate that provides the least increase in + // Excess pressure, we can immediately schedule from that Q. + // + // RegionCriticalPSets summarizes the pressure within the scheduled region and + // affects picking from either Q. If scheduling in one direction must + // increase pressure for one of the excess PSets, then schedule in that + // direction first to provide more freedom in the other direction. + if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) + || (BotCand.Reason == RegCritical + && !BotCand.isRepeat(RegCritical))) + { + IsTopNode = false; + tracePick(BotCand, IsTopNode); + return BotCand.SU; + } + // Check if the top Q has a better candidate. + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + + // Choose the queue with the most important (lowest enum) reason. + if (TopCand.Reason < BotCand.Reason) { + IsTopNode = true; + tracePick(TopCand, IsTopNode); + return TopCand.SU; + } + // Otherwise prefer the bottom candidate, in node order if all else failed. + IsTopNode = false; + tracePick(BotCand, IsTopNode); + return BotCand.SU; +} + +/// Pick the best node to balance the schedule. Implements MachineSchedStrategy. +SUnit *GenericScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && + Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + do { + if (RegionPolicy.OnlyTopDown) { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); + SU = TopCand.SU; + } + IsTopNode = true; + } + else if (RegionPolicy.OnlyBottomUp) { + SU = Bot.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate BotCand(NoPolicy); + pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + tracePick(BotCand, false); + SU = BotCand.SU; + } + IsTopNode = false; + } + else { + SU = pickNodeBidirectional(IsTopNode); + } + } while (SU->isScheduled); + + if (SU->isTopReady()) + Top.removeReady(SU); + if (SU->isBottomReady()) + Bot.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} + +void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { + + MachineBasicBlock::iterator InsertPos = SU->getInstr(); + if (!isTop) + ++InsertPos; + SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs; + + // Find already scheduled copies with a single physreg dependence and move + // them just above the scheduled instruction. + for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end(); + I != E; ++I) { + if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg())) + continue; + SUnit *DepSU = I->getSUnit(); + if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1) + continue; + MachineInstr *Copy = DepSU->getInstr(); + if (!Copy->isCopy()) + continue; + DEBUG(dbgs() << " Rescheduling physreg copy "; + I->getSUnit()->dump(DAG)); + DAG->moveInstruction(Copy, InsertPos); + } +} + +/// Update the scheduler's state after scheduling a node. This is the same node +/// that was just returned by pickNode(). However, ScheduleDAGMILive needs to +/// update it's state based on the current cycle before MachineSchedStrategy +/// does. +/// +/// FIXME: Eventually, we may bundle physreg copies rather than rescheduling +/// them here. See comments in biasPhysRegCopy. +void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { + if (IsTopNode) { + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); + Top.bumpNode(SU); + if (SU->hasPhysRegUses) + reschedulePhysRegCopies(SU, true); + } + else { + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle()); + Bot.bumpNode(SU); + if (SU->hasPhysRegDefs) + reschedulePhysRegCopies(SU, false); + } +} + +/// Create the standard converging machine scheduler. This will be used as the +/// default scheduler if the target does not set a default. +static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) { + ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)); + // Register DAG post-processors. + // + // FIXME: extend the mutation API to allow earlier mutations to instantiate + // data and pass it to later mutations. Have a single mutation that gathers + // the interesting nodes in one pass. + DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI)); + if (EnableLoadCluster && DAG->TII->enableClusterLoads()) + DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI)); + if (EnableMacroFusion) + DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI)); + return DAG; +} + +static MachineSchedRegistry +GenericSchedRegistry("converge", "Standard converging scheduler.", + createGenericSchedLive); + +//===----------------------------------------------------------------------===// +// PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. +//===----------------------------------------------------------------------===// + +void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; + + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); + + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + if (!Top.HazardRec) { + Top.HazardRec = + DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer( + Itin, DAG); + } +} + + +void PostGenericScheduler::registerRoots() { + Rem.CriticalPath = DAG->ExitSU.getDepth(); + + // Some roots may not feed into ExitSU. Check all of them in case. + for (SmallVectorImpl<SUnit*>::const_iterator + I = BotRoots.begin(), E = BotRoots.end(); I != E; ++I) { + if ((*I)->getDepth() > Rem.CriticalPath) + Rem.CriticalPath = (*I)->getDepth(); + } + DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n'); + if (DumpCriticalPathLength) { + errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n"; + } +} + +/// Apply a set of heursitics to a new candidate for PostRA scheduling. +/// +/// \param Cand provides the policy and current best candidate. +/// \param TryCand refers to the next SUnit candidate, otherwise uninitialized. +void PostGenericScheduler::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Top.getLatencyStallCycles(TryCand.SU), + Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + + // Avoid critical resource consumption and balance the schedule. + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, + TryCand, Cand, ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { + return; + } + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + TryCand.Reason = NodeOrder; +} + +void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) { + ReadyQueue &Q = Top.Available; + + DEBUG(Q.dump()); + + for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) { + SchedCandidate TryCand(Cand.Policy); + TryCand.SU = *I; + TryCand.initResourceDelta(DAG, SchedModel); + tryCandidate(Cand, TryCand); + if (TryCand.Reason != NoCand) { + Cand.setBest(TryCand); + DEBUG(traceCandidate(Cand)); + } + } +} + +/// Pick the next node to schedule. +SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) { + if (DAG->top() == DAG->bottom()) { + assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage"); + return nullptr; + } + SUnit *SU; + do { + SU = Top.pickOnlyChoice(); + if (!SU) { + CandPolicy NoPolicy; + SchedCandidate TopCand(NoPolicy); + // Set the top-down policy based on the state of the current top zone and + // the instructions outside the zone, including the bottom zone. + setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr); + pickNodeFromQueue(TopCand); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); + SU = TopCand.SU; + } + } while (SU->isScheduled); + + IsTopNode = true; + Top.removeReady(SU); + + DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); + return SU; +} + +/// Called after ScheduleDAGMI has scheduled an instruction and updated +/// scheduled/remaining flags in the DAG nodes. +void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle()); + Top.bumpNode(SU); +} + +/// Create a generic scheduler with no vreg liveness or DAG mutation passes. +static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) { + return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true); +} + +//===----------------------------------------------------------------------===// +// ILP Scheduler. Currently for experimental analysis of heuristics. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Order nodes by the ILP metric. +struct ILPOrder { + const SchedDFSResult *DFSResult; + const BitVector *ScheduledTrees; + bool MaximizeILP; + + ILPOrder(bool MaxILP) + : DFSResult(nullptr), ScheduledTrees(nullptr), MaximizeILP(MaxILP) {} + + /// \brief Apply a less-than relation on node priority. + /// + /// (Return true if A comes after B in the Q.) + bool operator()(const SUnit *A, const SUnit *B) const { + unsigned SchedTreeA = DFSResult->getSubtreeID(A); + unsigned SchedTreeB = DFSResult->getSubtreeID(B); + if (SchedTreeA != SchedTreeB) { + // Unscheduled trees have lower priority. + if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) + return ScheduledTrees->test(SchedTreeB); + + // Trees with shallower connections have have lower priority. + if (DFSResult->getSubtreeLevel(SchedTreeA) + != DFSResult->getSubtreeLevel(SchedTreeB)) { + return DFSResult->getSubtreeLevel(SchedTreeA) + < DFSResult->getSubtreeLevel(SchedTreeB); + } + } + if (MaximizeILP) + return DFSResult->getILP(A) < DFSResult->getILP(B); + else + return DFSResult->getILP(A) > DFSResult->getILP(B); + } +}; + +/// \brief Schedule based on the ILP metric. +class ILPScheduler : public MachineSchedStrategy { + ScheduleDAGMILive *DAG; + ILPOrder Cmp; + + std::vector<SUnit*> ReadyQ; +public: + ILPScheduler(bool MaximizeILP): DAG(nullptr), Cmp(MaximizeILP) {} + + void initialize(ScheduleDAGMI *dag) override { + assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness"); + DAG = static_cast<ScheduleDAGMILive*>(dag); + DAG->computeDFSResult(); + Cmp.DFSResult = DAG->getDFSResult(); + Cmp.ScheduledTrees = &DAG->getScheduledTrees(); + ReadyQ.clear(); + } + + void registerRoots() override { + // Restore the heap in ReadyQ with the updated DFS results. + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + /// Callback to select the highest priority node from the ready Q. + SUnit *pickNode(bool &IsTopNode) override { + if (ReadyQ.empty()) return nullptr; + std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + SUnit *SU = ReadyQ.back(); + ReadyQ.pop_back(); + IsTopNode = false; + DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") " + << " ILP: " << DAG->getDFSResult()->getILP(SU) + << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" + << DAG->getDFSResult()->getSubtreeLevel( + DAG->getDFSResult()->getSubtreeID(SU)) << '\n' + << "Scheduling " << *SU->getInstr()); + return SU; + } + + /// \brief Scheduler callback to notify that a new subtree is scheduled. + void scheduleTree(unsigned SubtreeID) override { + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + + /// Callback after a node is scheduled. Mark a newly scheduled tree, notify + /// DFSResults, and resort the priority Q. + void schedNode(SUnit *SU, bool IsTopNode) override { + assert(!IsTopNode && "SchedDFSResult needs bottom-up"); + } + + void releaseTopNode(SUnit *) override { /*only called for top roots*/ } + + void releaseBottomNode(SUnit *SU) override { + ReadyQ.push_back(SU); + std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } +}; +} // namespace + +static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) { + return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(true)); +} +static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) { + return new ScheduleDAGMILive(C, make_unique<ILPScheduler>(false)); +} +static MachineSchedRegistry ILPMaxRegistry( + "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler); +static MachineSchedRegistry ILPMinRegistry( + "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler); + +//===----------------------------------------------------------------------===// +// Machine Instruction Shuffler for Correctness Testing +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace { +/// Apply a less-than relation on the node order, which corresponds to the +/// instruction order prior to scheduling. IsReverse implements greater-than. +template<bool IsReverse> +struct SUnitOrder { + bool operator()(SUnit *A, SUnit *B) const { + if (IsReverse) + return A->NodeNum > B->NodeNum; + else + return A->NodeNum < B->NodeNum; + } +}; + +/// Reorder instructions as much as possible. +class InstructionShuffler : public MachineSchedStrategy { + bool IsAlternating; + bool IsTopDown; + + // Using a less-than relation (SUnitOrder<false>) for the TopQ priority + // gives nodes with a higher number higher priority causing the latest + // instructions to be scheduled first. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> > + TopQ; + // When scheduling bottom-up, use greater-than as the queue priority. + PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> > + BottomQ; +public: + InstructionShuffler(bool alternate, bool topdown) + : IsAlternating(alternate), IsTopDown(topdown) {} + + void initialize(ScheduleDAGMI*) override { + TopQ.clear(); + BottomQ.clear(); + } + + /// Implement MachineSchedStrategy interface. + /// ----------------------------------------- + + SUnit *pickNode(bool &IsTopNode) override { + SUnit *SU; + if (IsTopDown) { + do { + if (TopQ.empty()) return nullptr; + SU = TopQ.top(); + TopQ.pop(); + } while (SU->isScheduled); + IsTopNode = true; + } + else { + do { + if (BottomQ.empty()) return nullptr; + SU = BottomQ.top(); + BottomQ.pop(); + } while (SU->isScheduled); + IsTopNode = false; + } + if (IsAlternating) + IsTopDown = !IsTopDown; + return SU; + } + + void schedNode(SUnit *SU, bool IsTopNode) override {} + + void releaseTopNode(SUnit *SU) override { + TopQ.push(SU); + } + void releaseBottomNode(SUnit *SU) override { + BottomQ.push(SU); + } +}; +} // namespace + +static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) { + bool Alternate = !ForceTopDown && !ForceBottomUp; + bool TopDown = !ForceBottomUp; + assert((TopDown || !ForceTopDown) && + "-misched-topdown incompatible with -misched-bottomup"); + return new ScheduleDAGMILive(C, make_unique<InstructionShuffler>(Alternate, TopDown)); +} +static MachineSchedRegistry ShufflerRegistry( + "shuffle", "Shuffle machine instructions alternating directions", + createInstructionShuffler); +#endif // !NDEBUG + +//===----------------------------------------------------------------------===// +// GraphWriter support for ScheduleDAGMILive. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace llvm { + +template<> struct GraphTraits< + ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {}; + +template<> +struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + if (ViewMISchedCutoff == 0) + return false; + return (Node->Preds.size() > ViewMISchedCutoff + || Node->Succs.size() > ViewMISchedCutoff); + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { + std::string Str; + raw_string_ostream SS(Str); + const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; + SS << "SU:" << SU->NodeNum; + if (DFS) + SS << " I:" << DFS->getNumInstrs(SU); + return SS.str(); + } + static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); + } + + static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) { + std::string Str("shape=Mrecord"); + const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G); + const SchedDFSResult *DFS = DAG->hasVRegLiveness() ? + static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr; + if (DFS) { + Str += ",style=filled,fillcolor=\"#"; + Str += DOT::getColorString(DFS->getSubtreeID(N)); + Str += '"'; + } + return Str; + } +}; +} // namespace llvm +#endif // NDEBUG + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAGMI::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp new file mode 100644 index 0000000..5e6d619 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -0,0 +1,813 @@ +//===-- MachineSink.cpp - Sinking for machine instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass moves instructions into successor blocks when possible, so that +// they aren't executed on paths where their results aren't needed. +// +// This pass is not intended to be a replacement or a complete alternative +// for an LLVM-IR-level sinking pass. It is only designed to sink simple +// constructs that are not exposed before lowering and instruction selection. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SparseBitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "machine-sink" + +static cl::opt<bool> +SplitEdges("machine-sink-split", + cl::desc("Split critical edges during machine sinking"), + cl::init(true), cl::Hidden); + +static cl::opt<bool> +UseBlockFreqInfo("machine-sink-bfi", + cl::desc("Use block frequency info to find successors to sink"), + cl::init(true), cl::Hidden); + + +STATISTIC(NumSunk, "Number of machine instructions sunk"); +STATISTIC(NumSplit, "Number of critical edges split"); +STATISTIC(NumCoalesces, "Number of copies coalesced"); + +namespace { + class MachineSinking : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; // Machine register information + MachineDominatorTree *DT; // Machine dominator tree + MachinePostDominatorTree *PDT; // Machine post dominator tree + MachineLoopInfo *LI; + const MachineBlockFrequencyInfo *MBFI; + AliasAnalysis *AA; + + // Remember which edges have been considered for breaking. + SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8> + CEBCandidates; + // Remember which edges we are about to split. + // This is different from CEBCandidates since those edges + // will be split. + SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit; + + SparseBitVector<> RegsToClearKillFlags; + + typedef std::map<MachineBasicBlock *, SmallVector<MachineBasicBlock *, 4>> + AllSuccsCache; + + public: + static char ID; // Pass identification + MachineSinking() : MachineFunctionPass(ID) { + initializeMachineSinkingPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachinePostDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + if (UseBlockFreqInfo) + AU.addRequired<MachineBlockFrequencyInfo>(); + } + + void releaseMemory() override { + CEBCandidates.clear(); + } + + private: + bool ProcessBlock(MachineBasicBlock &MBB); + bool isWorthBreakingCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To); + /// \brief Postpone the splitting of the given critical + /// edge (\p From, \p To). + /// + /// We do not split the edges on the fly. Indeed, this invalidates + /// the dominance information and thus triggers a lot of updates + /// of that information underneath. + /// Instead, we postpone all the splits after each iteration of + /// the main loop. That way, the information is at least valid + /// for the lifetime of an iteration. + /// + /// \return True if the edge is marked as toSplit, false otherwise. + /// False can be returned if, for instance, this is not profitable. + bool PostponeSplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To, + bool BreakPHIEdge); + bool SinkInstruction(MachineInstr *MI, bool &SawStore, + AllSuccsCache &AllSuccessors); + bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, bool &LocalUse) const; + MachineBasicBlock *FindSuccToSinkTo(MachineInstr *MI, MachineBasicBlock *MBB, + bool &BreakPHIEdge, AllSuccsCache &AllSuccessors); + bool isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo, + AllSuccsCache &AllSuccessors); + + bool PerformTrivialForwardCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB); + + SmallVector<MachineBasicBlock *, 4> & + GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB, + AllSuccsCache &AllSuccessors) const; + }; +} // end anonymous namespace + +char MachineSinking::ID = 0; +char &llvm::MachineSinkingID = MachineSinking::ID; +INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink", + "Machine code sinking", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(MachineSinking, "machine-sink", + "Machine code sinking", false, false) + +bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI, + MachineBasicBlock *MBB) { + if (!MI->isCopy()) + return false; + + unsigned SrcReg = MI->getOperand(1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || + !TargetRegisterInfo::isVirtualRegister(DstReg) || + !MRI->hasOneNonDBGUse(SrcReg)) + return false; + + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *DRC = MRI->getRegClass(DstReg); + if (SRC != DRC) + return false; + + MachineInstr *DefMI = MRI->getVRegDef(SrcReg); + if (DefMI->isCopyLike()) + return false; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MRI->replaceRegWith(DstReg, SrcReg); + MI->eraseFromParent(); + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + MRI->clearKillFlags(SrcReg); + + ++NumCoalesces; + return true; +} + +/// AllUsesDominatedByBlock - Return true if all uses of the specified register +/// occur in blocks dominated by the specified block. If any use is in the +/// definition block, then return false since it is never legal to move def +/// after uses. +bool +MachineSinking::AllUsesDominatedByBlock(unsigned Reg, + MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB, + bool &BreakPHIEdge, + bool &LocalUse) const { + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Only makes sense for vregs"); + + // Ignore debug uses because debug info doesn't affect the code. + if (MRI->use_nodbg_empty(Reg)) + return true; + + // BreakPHIEdge is true if all the uses are in the successor MBB being sunken + // into and they are all PHI nodes. In this case, machine-sink must break + // the critical edge first. e.g. + // + // BB#1: derived from LLVM BB %bb4.preheader + // Predecessors according to CFG: BB#0 + // ... + // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead> + // ... + // JE_4 <BB#37>, %EFLAGS<imp-use> + // Successors according to CFG: BB#37 BB#2 + // + // BB#2: derived from LLVM BB %bb.nph + // Predecessors according to CFG: BB#0 BB#1 + // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1> + BreakPHIEdge = true; + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = &MO - &UseInst->getOperand(0); + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (!(UseBlock == MBB && UseInst->isPHI() && + UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) { + BreakPHIEdge = false; + break; + } + } + if (BreakPHIEdge) + return true; + + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + // Determine the block of the use. + MachineInstr *UseInst = MO.getParent(); + unsigned OpNo = &MO - &UseInst->getOperand(0); + MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->isPHI()) { + // PHI nodes use the operand in the predecessor block, not the block with + // the PHI. + UseBlock = UseInst->getOperand(OpNo+1).getMBB(); + } else if (UseBlock == DefMBB) { + LocalUse = true; + return false; + } + + // Check that it dominates. + if (!DT->dominates(MBB, UseBlock)) + return false; + } + + return true; +} + +bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + DEBUG(dbgs() << "******** Machine Sinking ********\n"); + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + DT = &getAnalysis<MachineDominatorTree>(); + PDT = &getAnalysis<MachinePostDominatorTree>(); + LI = &getAnalysis<MachineLoopInfo>(); + MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr; + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + + bool EverMadeChange = false; + + while (1) { + bool MadeChange = false; + + // Process all basic blocks. + CEBCandidates.clear(); + ToSplit.clear(); + for (auto &MBB: MF) + MadeChange |= ProcessBlock(MBB); + + // If we have anything we marked as toSplit, split it now. + for (auto &Pair : ToSplit) { + auto NewSucc = Pair.first->SplitCriticalEdge(Pair.second, this); + if (NewSucc != nullptr) { + DEBUG(dbgs() << " *** Splitting critical edge:" + " BB#" << Pair.first->getNumber() + << " -- BB#" << NewSucc->getNumber() + << " -- BB#" << Pair.second->getNumber() << '\n'); + MadeChange = true; + ++NumSplit; + } else + DEBUG(dbgs() << " *** Not legal to break critical edge\n"); + } + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; + } + + // Now clear any kill flags for recorded registers. + for (auto I : RegsToClearKillFlags) + MRI->clearKillFlags(I); + RegsToClearKillFlags.clear(); + + return EverMadeChange; +} + +bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { + // Can't sink anything out of a block that has less than two successors. + if (MBB.succ_size() <= 1 || MBB.empty()) return false; + + // Don't bother sinking code out of unreachable blocks. In addition to being + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. + if (!DT->isReachableFromEntry(&MBB)) return false; + + bool MadeChange = false; + + // Cache all successors, sorted by frequency info and loop depth. + AllSuccsCache AllSuccessors; + + // Walk the basic block bottom-up. Remember if we saw a store. + MachineBasicBlock::iterator I = MBB.end(); + --I; + bool ProcessedBegin, SawStore = false; + do { + MachineInstr *MI = I; // The instruction to sink. + + // Predecrement I (if it's not begin) so that it isn't invalidated by + // sinking. + ProcessedBegin = I == MBB.begin(); + if (!ProcessedBegin) + --I; + + if (MI->isDebugValue()) + continue; + + bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); + if (Joined) { + MadeChange = true; + continue; + } + + if (SinkInstruction(MI, SawStore, AllSuccessors)) + ++NumSunk, MadeChange = true; + + // If we just processed the first instruction in the block, we're done. + } while (!ProcessedBegin); + + return MadeChange; +} + +bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, + MachineBasicBlock *From, + MachineBasicBlock *To) { + // FIXME: Need much better heuristics. + + // If the pass has already considered breaking this edge (during this pass + // through the function), then let's go ahead and break it. This means + // sinking multiple "cheap" instructions into the same block. + if (!CEBCandidates.insert(std::make_pair(From, To)).second) + return true; + + if (!MI->isCopy() && !TII->isAsCheapAsAMove(MI)) + return true; + + // MI is cheap, we probably don't want to break the critical edge for it. + // However, if this would allow some definitions of its source operands + // to be sunk then it's probably worth it. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // We don't move live definitions of physical registers, + // so sinking their uses won't enable any opportunities. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + // If this instruction is the only user of a virtual register, + // check if breaking the edge will enable sinking + // both this instruction and the defining instruction. + if (MRI->hasOneNonDBGUse(Reg)) { + // If the definition resides in same MBB, + // claim it's likely we can sink these together. + // If definition resides elsewhere, we aren't + // blocking it from being sunk so don't break the edge. + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() == MI->getParent()) + return true; + } + } + + return false; +} + +bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr *MI, + MachineBasicBlock *FromBB, + MachineBasicBlock *ToBB, + bool BreakPHIEdge) { + if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB)) + return false; + + // Avoid breaking back edge. From == To means backedge for single BB loop. + if (!SplitEdges || FromBB == ToBB) + return false; + + // Check for backedges of more "complex" loops. + if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) && + LI->isLoopHeader(ToBB)) + return false; + + // It's not always legal to break critical edges and sink the computation + // to the edge. + // + // BB#1: + // v1024 + // Beq BB#3 + // <fallthrough> + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted: + // + // BB#1: + // ... + // Bne BB#2 + // BB#4: + // v1024 = + // B BB#3 + // BB#2: + // ... no uses of v1024 + // <fallthrough> + // BB#3: + // ... + // = v1024 + // + // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3 + // flow. We need to ensure the new basic block where the computation is + // sunk to dominates all the uses. + // It's only legal to break critical edge and sink the computation to the + // new block if all the predecessors of "To", except for "From", are + // not dominated by "From". Given SSA property, this means these + // predecessors are dominated by "To". + // + // There is no need to do this check if all the uses are PHI nodes. PHI + // sources are only defined on the specific predecessor edges. + if (!BreakPHIEdge) { + for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(), + E = ToBB->pred_end(); PI != E; ++PI) { + if (*PI == FromBB) + continue; + if (!DT->dominates(ToBB, *PI)) + return false; + } + } + + ToSplit.insert(std::make_pair(FromBB, ToBB)); + + return true; +} + +static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { + return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence(); +} + +/// collectDebgValues - Scan instructions following MI and collect any +/// matching DBG_VALUEs. +static void collectDebugValues(MachineInstr *MI, + SmallVectorImpl<MachineInstr *> &DbgValues) { + DbgValues.clear(); + if (!MI->getOperand(0).isReg()) + return; + + MachineBasicBlock::iterator DI = MI; ++DI; + for (MachineBasicBlock::iterator DE = MI->getParent()->end(); + DI != DE; ++DI) { + if (!DI->isDebugValue()) + return; + if (DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() == MI->getOperand(0).getReg()) + DbgValues.push_back(DI); + } +} + +/// isProfitableToSinkTo - Return true if it is profitable to sink MI. +bool MachineSinking::isProfitableToSinkTo(unsigned Reg, MachineInstr *MI, + MachineBasicBlock *MBB, + MachineBasicBlock *SuccToSinkTo, + AllSuccsCache &AllSuccessors) { + assert (MI && "Invalid MachineInstr!"); + assert (SuccToSinkTo && "Invalid SinkTo Candidate BB"); + + if (MBB == SuccToSinkTo) + return false; + + // It is profitable if SuccToSinkTo does not post dominate current block. + if (!PDT->dominates(SuccToSinkTo, MBB)) + return true; + + // It is profitable to sink an instruction from a deeper loop to a shallower + // loop, even if the latter post-dominates the former (PR21115). + if (LI->getLoopDepth(MBB) > LI->getLoopDepth(SuccToSinkTo)) + return true; + + // Check if only use in post dominated block is PHI instruction. + bool NonPHIUse = false; + for (MachineInstr &UseInst : MRI->use_nodbg_instructions(Reg)) { + MachineBasicBlock *UseBlock = UseInst.getParent(); + if (UseBlock == SuccToSinkTo && !UseInst.isPHI()) + NonPHIUse = true; + } + if (!NonPHIUse) + return true; + + // If SuccToSinkTo post dominates then also it may be profitable if MI + // can further profitably sinked into another block in next round. + bool BreakPHIEdge = false; + // FIXME - If finding successor is compile time expensive then cache results. + if (MachineBasicBlock *MBB2 = + FindSuccToSinkTo(MI, SuccToSinkTo, BreakPHIEdge, AllSuccessors)) + return isProfitableToSinkTo(Reg, MI, SuccToSinkTo, MBB2, AllSuccessors); + + // If SuccToSinkTo is final destination and it is a post dominator of current + // block then it is not profitable to sink MI into SuccToSinkTo block. + return false; +} + +/// Get the sorted sequence of successors for this MachineBasicBlock, possibly +/// computing it if it was not already cached. +SmallVector<MachineBasicBlock *, 4> & +MachineSinking::GetAllSortedSuccessors(MachineInstr *MI, MachineBasicBlock *MBB, + AllSuccsCache &AllSuccessors) const { + + // Do we have the sorted successors in cache ? + auto Succs = AllSuccessors.find(MBB); + if (Succs != AllSuccessors.end()) + return Succs->second; + + SmallVector<MachineBasicBlock *, 4> AllSuccs(MBB->succ_begin(), + MBB->succ_end()); + + // Handle cases where sinking can happen but where the sink point isn't a + // successor. For example: + // + // x = computation + // if () {} else {} + // use x + // + const std::vector<MachineDomTreeNode *> &Children = + DT->getNode(MBB)->getChildren(); + for (const auto &DTChild : Children) + // DomTree children of MBB that have MBB as immediate dominator are added. + if (DTChild->getIDom()->getBlock() == MI->getParent() && + // Skip MBBs already added to the AllSuccs vector above. + !MBB->isSuccessor(DTChild->getBlock())) + AllSuccs.push_back(DTChild->getBlock()); + + // Sort Successors according to their loop depth or block frequency info. + std::stable_sort( + AllSuccs.begin(), AllSuccs.end(), + [this](const MachineBasicBlock *L, const MachineBasicBlock *R) { + uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0; + uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0; + bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0; + return HasBlockFreq ? LHSFreq < RHSFreq + : LI->getLoopDepth(L) < LI->getLoopDepth(R); + }); + + auto it = AllSuccessors.insert(std::make_pair(MBB, AllSuccs)); + + return it.first->second; +} + +/// FindSuccToSinkTo - Find a successor to sink this instruction to. +MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, + MachineBasicBlock *MBB, + bool &BreakPHIEdge, + AllSuccsCache &AllSuccessors) { + + assert (MI && "Invalid MachineInstr!"); + assert (MBB && "Invalid MachineBasicBlock!"); + + // Loop over all the operands of the specified instruction. If there is + // anything we can't handle, bail out. + + // SuccToSinkTo - This is the successor to sink this instruction to, once we + // decide. + MachineBasicBlock *SuccToSinkTo = nullptr; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; // Ignore non-register operands. + + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI->isConstantPhysReg(Reg, *MBB->getParent())) + return nullptr; + } else if (!MO.isDead()) { + // A def that isn't dead. We can't move it. + return nullptr; + } + } else { + // Virtual register uses are always safe to sink. + if (MO.isUse()) continue; + + // If it's not safe to move defs of the register class, then abort. + if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg))) + return nullptr; + + // Virtual register defs can only be sunk if all their uses are in blocks + // dominated by one of the successors. + if (SuccToSinkTo) { + // If a previous operand picked a block to sink to, then this operand + // must be sinkable to the same block. + bool LocalUse = false; + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, MBB, + BreakPHIEdge, LocalUse)) + return nullptr; + + continue; + } + + // Otherwise, we should look at all the successors and decide which one + // we should sink to. If we have reliable block frequency information + // (frequency != 0) available, give successors with smaller frequencies + // higher priority, otherwise prioritize smaller loop depths. + for (MachineBasicBlock *SuccBlock : + GetAllSortedSuccessors(MI, MBB, AllSuccessors)) { + bool LocalUse = false; + if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, + BreakPHIEdge, LocalUse)) { + SuccToSinkTo = SuccBlock; + break; + } + if (LocalUse) + // Def is used locally, it's never safe to move this def. + return nullptr; + } + + // If we couldn't find a block to sink to, ignore this instruction. + if (!SuccToSinkTo) + return nullptr; + if (!isProfitableToSinkTo(Reg, MI, MBB, SuccToSinkTo, AllSuccessors)) + return nullptr; + } + } + + // It is not possible to sink an instruction into its own block. This can + // happen with loops. + if (MBB == SuccToSinkTo) + return nullptr; + + // It's not safe to sink instructions to EH landing pad. Control flow into + // landing pad is implicitly defined. + if (SuccToSinkTo && SuccToSinkTo->isEHPad()) + return nullptr; + + return SuccToSinkTo; +} + +/// SinkInstruction - Determine whether it is safe to sink the specified machine +/// instruction out of its current block into a successor. +bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore, + AllSuccsCache &AllSuccessors) { + // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to + // be close to the source to make it easier to coalesce. + if (AvoidsSinking(MI, MRI)) + return false; + + // Check if it's safe to move the instruction. + if (!MI->isSafeToMove(AA, SawStore)) + return false; + + // Convergent operations may not be made control-dependent on additional + // values. + if (MI->isConvergent()) + return false; + + // FIXME: This should include support for sinking instructions within the + // block they are currently in to shorten the live ranges. We often get + // instructions sunk into the top of a large block, but it would be better to + // also sink them down before their first use in the block. This xform has to + // be careful not to *increase* register pressure though, e.g. sinking + // "x = y + z" down if it kills y and z would increase the live ranges of y + // and z and only shrink the live range of x. + + bool BreakPHIEdge = false; + MachineBasicBlock *ParentBlock = MI->getParent(); + MachineBasicBlock *SuccToSinkTo = + FindSuccToSinkTo(MI, ParentBlock, BreakPHIEdge, AllSuccessors); + + // If there are no outputs, it must have side-effects. + if (!SuccToSinkTo) + return false; + + + // If the instruction to move defines a dead physical register which is live + // when leaving the basic block, don't move it because it could turn into a + // "zombie" define of that preg. E.g., EFLAGS. (<rdar://problem/8030636>) + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (SuccToSinkTo->isLiveIn(Reg)) + return false; + } + + DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + + // If the block has multiple predecessors, this is a critical edge. + // Decide if we can sink along it or need to break the edge. + if (SuccToSinkTo->pred_size() > 1) { + // We cannot sink a load across a critical edge - there may be stores in + // other code paths. + bool TryBreak = false; + bool store = true; + if (!MI->isSafeToMove(AA, store)) { + DEBUG(dbgs() << " *** NOTE: Won't sink load along critical edge.\n"); + TryBreak = true; + } + + // We don't want to sink across a critical edge if we don't dominate the + // successor. We could be introducing calculations to new code paths. + if (!TryBreak && !DT->dominates(ParentBlock, SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Critical edge found\n"); + TryBreak = true; + } + + // Don't sink instructions into a loop. + if (!TryBreak && LI->isLoopHeader(SuccToSinkTo)) { + DEBUG(dbgs() << " *** NOTE: Loop header found\n"); + TryBreak = true; + } + + // Otherwise we are OK with sinking along a critical edge. + if (!TryBreak) + DEBUG(dbgs() << "Sinking along critical edge.\n"); + else { + // Mark this edge as to be split. + // If the edge can actually be split, the next iteration of the main loop + // will sink MI in the newly created block. + bool Status = + PostponeSplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge); + if (!Status) + DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); + // The instruction will not be sunk this time. + return false; + } + } + + if (BreakPHIEdge) { + // BreakPHIEdge is true if all the uses are in the successor MBB being + // sunken into and they are all PHI nodes. In this case, machine-sink must + // break the critical edge first. + bool Status = PostponeSplitCriticalEdge(MI, ParentBlock, + SuccToSinkTo, BreakPHIEdge); + if (!Status) + DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to " + "break critical edge\n"); + // The instruction will not be sunk this time. + return false; + } + + // Determine where to insert into. Skip phi nodes. + MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); + while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) + ++InsertPos; + + // collect matching debug values. + SmallVector<MachineInstr *, 2> DbgValuesToSink; + collectDebugValues(MI, DbgValuesToSink); + + // Move the instruction. + SuccToSinkTo->splice(InsertPos, ParentBlock, MI, + ++MachineBasicBlock::iterator(MI)); + + // Move debug values. + for (SmallVectorImpl<MachineInstr *>::iterator DBI = DbgValuesToSink.begin(), + DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { + MachineInstr *DbgMI = *DBI; + SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, + ++MachineBasicBlock::iterator(DbgMI)); + } + + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. + // Note that we have to clear the kill flags for any register this instruction + // uses as we may sink over another instruction which currently kills the + // used registers. + for (MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.isUse()) + RegsToClearKillFlags.set(MO.getReg()); // Remember to clear kill flags. + } + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp new file mode 100644 index 0000000..f7edacd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -0,0 +1,1325 @@ +//===- lib/CodeGen/MachineTraceMetrics.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "machine-trace-metrics" + +char MachineTraceMetrics::ID = 0; +char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID; + +INITIALIZE_PASS_BEGIN(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(MachineTraceMetrics, + "machine-trace-metrics", "Machine Trace Metrics", false, true) + +MachineTraceMetrics::MachineTraceMetrics() + : MachineFunctionPass(ID), MF(nullptr), TII(nullptr), TRI(nullptr), + MRI(nullptr), Loops(nullptr) { + std::fill(std::begin(Ensembles), std::end(Ensembles), nullptr); +} + +void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetSubtargetInfo &ST = MF->getSubtarget(); + TII = ST.getInstrInfo(); + TRI = ST.getRegisterInfo(); + MRI = &MF->getRegInfo(); + Loops = &getAnalysis<MachineLoopInfo>(); + SchedModel.init(ST.getSchedModel(), &ST, TII); + BlockInfo.resize(MF->getNumBlockIDs()); + ProcResourceCycles.resize(MF->getNumBlockIDs() * + SchedModel.getNumProcResourceKinds()); + return false; +} + +void MachineTraceMetrics::releaseMemory() { + MF = nullptr; + BlockInfo.clear(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) { + delete Ensembles[i]; + Ensembles[i] = nullptr; + } +} + +//===----------------------------------------------------------------------===// +// Fixed block information +//===----------------------------------------------------------------------===// +// +// The number of instructions in a basic block and the CPU resources used by +// those instructions don't depend on any given trace strategy. + +/// Compute the resource usage in basic block MBB. +const MachineTraceMetrics::FixedBlockInfo* +MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { + assert(MBB && "No basic block"); + FixedBlockInfo *FBI = &BlockInfo[MBB->getNumber()]; + if (FBI->hasResources()) + return FBI; + + // Compute resource usage in the block. + FBI->HasCalls = false; + unsigned InstrCount = 0; + + // Add up per-processor resource cycles as well. + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + SmallVector<unsigned, 32> PRCycles(PRKinds); + + for (const auto &MI : *MBB) { + if (MI.isTransient()) + continue; + ++InstrCount; + if (MI.isCall()) + FBI->HasCalls = true; + + // Count processor resources used. + if (!SchedModel.hasInstrSchedModel()) + continue; + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(&MI); + if (!SC->isValid()) + continue; + + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); + PRCycles[PI->ProcResourceIdx] += PI->Cycles; + } + } + FBI->InstrCount = InstrCount; + + // Scale the resource cycles so they are comparable. + unsigned PROffset = MBB->getNumber() * PRKinds; + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceCycles[PROffset + K] = + PRCycles[K] * SchedModel.getResourceFactor(K); + + return FBI; +} + +ArrayRef<unsigned> +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { + assert(BlockInfo[MBBNum].hasResources() && + "getResources() must be called before getProcResourceCycles()"); + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); + return makeArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, PRKinds); +} + + +//===----------------------------------------------------------------------===// +// Ensemble utility functions +//===----------------------------------------------------------------------===// + +MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) + : MTM(*ct) { + BlockInfo.resize(MTM.BlockInfo.size()); + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); +} + +// Virtual destructor serves as an anchor. +MachineTraceMetrics::Ensemble::~Ensemble() {} + +const MachineLoop* +MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { + return MTM.Loops->getLoopFor(MBB); +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace above MBB. +void MachineTraceMetrics::Ensemble:: +computeDepthResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; + + // Compute resources from trace above. The top block is simple. + if (!TBI->Pred) { + TBI->InstrDepth = 0; + TBI->Head = MBB->getNumber(); + std::fill(ProcResourceDepths.begin() + PROffset, + ProcResourceDepths.begin() + PROffset + PRKinds, 0); + return; + } + + // Compute from the block above. A post-order traversal ensures the + // predecessor is always computed first. + unsigned PredNum = TBI->Pred->getNumber(); + TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; + assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); + const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); + TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; + TBI->Head = PredTBI->Head; + + // Compute per-resource depths. + ArrayRef<unsigned> PredPRDepths = getProcResourceDepths(PredNum); + ArrayRef<unsigned> PredPRCycles = MTM.getProcResourceCycles(PredNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; +} + +// Update resource-related information in the TraceBlockInfo for MBB. +// Only update resources related to the trace below MBB. +void MachineTraceMetrics::Ensemble:: +computeHeightResources(const MachineBasicBlock *MBB) { + TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; + + // Compute resources for the current block. + TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + ArrayRef<unsigned> PRCycles = MTM.getProcResourceCycles(MBB->getNumber()); + + // The trace tail is done. + if (!TBI->Succ) { + TBI->Tail = MBB->getNumber(); + std::copy(PRCycles.begin(), PRCycles.end(), + ProcResourceHeights.begin() + PROffset); + return; + } + + // Compute from the block below. A post-order traversal ensures the + // predecessor is always computed first. + unsigned SuccNum = TBI->Succ->getNumber(); + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; + assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); + TBI->InstrHeight += SuccTBI->InstrHeight; + TBI->Tail = SuccTBI->Tail; + + // Compute per-resource heights. + ArrayRef<unsigned> SuccPRHeights = getProcResourceHeights(SuccNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; +} + +// Check if depth resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getDepthResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidDepth() ? TBI : nullptr; +} + +// Check if height resources for MBB are valid and return the TBI. +// Return NULL if the resources have been invalidated. +const MachineTraceMetrics::TraceBlockInfo* +MachineTraceMetrics::Ensemble:: +getHeightResources(const MachineBasicBlock *MBB) const { + const TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + return TBI->hasValidHeight() ? TBI : nullptr; +} + +/// Get an array of processor resource depths for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by all blocks preceding MBB in its trace. It does not include instructions +/// in MBB. +/// +/// Compare TraceBlockInfo::InstrDepth. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceDepths(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); + return makeArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, PRKinds); +} + +/// Get an array of processor resource heights for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by this block and all blocks following it in its trace. +/// +/// Compare TraceBlockInfo::InstrHeight. +ArrayRef<unsigned> +MachineTraceMetrics::Ensemble:: +getProcResourceHeights(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); + return makeArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, PRKinds); +} + +//===----------------------------------------------------------------------===// +// Trace Selection Strategies +//===----------------------------------------------------------------------===// +// +// A trace selection strategy is implemented as a sub-class of Ensemble. The +// trace through a block B is computed by two DFS traversals of the CFG +// starting from B. One upwards, and one downwards. During the upwards DFS, +// pickTracePred() is called on the post-ordered blocks. During the downwards +// DFS, pickTraceSucc() is called in a post-order. +// + +// We never allow traces that leave loops, but we do allow traces to enter +// nested loops. We also never allow traces to contain back-edges. +// +// This means that a loop header can never appear above the center block of a +// trace, except as the trace head. Below the center block, loop exiting edges +// are banned. +// +// Return true if an edge from the From loop to the To loop is leaving a loop. +// Either of To and From can be null. +static bool isExitingLoop(const MachineLoop *From, const MachineLoop *To) { + return From && !From->contains(To); +} + +// MinInstrCountEnsemble - Pick the trace that executes the least number of +// instructions. +namespace { +class MinInstrCountEnsemble : public MachineTraceMetrics::Ensemble { + const char *getName() const override { return "MinInstr"; } + const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) override; + const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) override; + +public: + MinInstrCountEnsemble(MachineTraceMetrics *mtm) + : MachineTraceMetrics::Ensemble(mtm) {} +}; +} + +// Select the preferred predecessor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTracePred(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return nullptr; + const MachineLoop *CurLoop = getLoopFor(MBB); + // Don't leave loops, and never follow back-edges. + if (CurLoop && MBB == CurLoop->getHeader()) + return nullptr; + unsigned CurCount = MTM.getResources(MBB)->InstrCount; + const MachineBasicBlock *Best = nullptr; + unsigned BestDepth = 0; + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + const MachineTraceMetrics::TraceBlockInfo *PredTBI = + getDepthResources(Pred); + // Ignore cycles that aren't natural loops. + if (!PredTBI) + continue; + // Pick the predecessor that would give this block the smallest InstrDepth. + unsigned Depth = PredTBI->InstrDepth + CurCount; + if (!Best || Depth < BestDepth) + Best = Pred, BestDepth = Depth; + } + return Best; +} + +// Select the preferred successor for MBB. +const MachineBasicBlock* +MinInstrCountEnsemble::pickTraceSucc(const MachineBasicBlock *MBB) { + if (MBB->pred_empty()) + return nullptr; + const MachineLoop *CurLoop = getLoopFor(MBB); + const MachineBasicBlock *Best = nullptr; + unsigned BestHeight = 0; + for (const MachineBasicBlock *Succ : MBB->successors()) { + // Don't consider back-edges. + if (CurLoop && Succ == CurLoop->getHeader()) + continue; + // Don't consider successors exiting CurLoop. + if (isExitingLoop(CurLoop, getLoopFor(Succ))) + continue; + const MachineTraceMetrics::TraceBlockInfo *SuccTBI = + getHeightResources(Succ); + // Ignore cycles that aren't natural loops. + if (!SuccTBI) + continue; + // Pick the successor that would give this block the smallest InstrHeight. + unsigned Height = SuccTBI->InstrHeight; + if (!Best || Height < BestHeight) + Best = Succ, BestHeight = Height; + } + return Best; +} + +// Get an Ensemble sub-class for the requested trace strategy. +MachineTraceMetrics::Ensemble * +MachineTraceMetrics::getEnsemble(MachineTraceMetrics::Strategy strategy) { + assert(strategy < TS_NumStrategies && "Invalid trace strategy enum"); + Ensemble *&E = Ensembles[strategy]; + if (E) + return E; + + // Allocate new Ensemble on demand. + switch (strategy) { + case TS_MinInstrCount: return (E = new MinInstrCountEnsemble(this)); + default: llvm_unreachable("Invalid trace strategy enum"); + } +} + +void MachineTraceMetrics::invalidate(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Invalidate traces through BB#" << MBB->getNumber() << '\n'); + BlockInfo[MBB->getNumber()].invalidate(); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->invalidate(MBB); +} + +void MachineTraceMetrics::verifyAnalysis() const { + if (!MF) + return; +#ifndef NDEBUG + assert(BlockInfo.size() == MF->getNumBlockIDs() && "Outdated BlockInfo size"); + for (unsigned i = 0; i != TS_NumStrategies; ++i) + if (Ensembles[i]) + Ensembles[i]->verify(); +#endif +} + +//===----------------------------------------------------------------------===// +// Trace building +//===----------------------------------------------------------------------===// +// +// Traces are built by two CFG traversals. To avoid recomputing too much, use a +// set abstraction that confines the search to the current loop, and doesn't +// revisit blocks. + +namespace { +struct LoopBounds { + MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> Blocks; + SmallPtrSet<const MachineBasicBlock*, 8> Visited; + const MachineLoopInfo *Loops; + bool Downward; + LoopBounds(MutableArrayRef<MachineTraceMetrics::TraceBlockInfo> blocks, + const MachineLoopInfo *loops) + : Blocks(blocks), Loops(loops), Downward(false) {} +}; +} + +// Specialize po_iterator_storage in order to prune the post-order traversal so +// it is limited to the current loop and doesn't traverse the loop back edges. +namespace llvm { +template<> +class po_iterator_storage<LoopBounds, true> { + LoopBounds &LB; +public: + po_iterator_storage(LoopBounds &lb) : LB(lb) {} + void finishPostorder(const MachineBasicBlock*) {} + + bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) { + // Skip already visited To blocks. + MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()]; + if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth()) + return false; + // From is null once when To is the trace center block. + if (From) { + if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) { + // Don't follow backedges, don't leave FromLoop when going upwards. + if ((LB.Downward ? To : From) == FromLoop->getHeader()) + return false; + // Don't leave FromLoop. + if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To))) + return false; + } + } + // To is a new block. Mark the block as visited in case the CFG has cycles + // that MachineLoopInfo didn't recognize as a natural loop. + return LB.Visited.insert(To).second; + } +}; +} + +/// Compute the trace through MBB. +void MachineTraceMetrics::Ensemble::computeTrace(const MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Computing " << getName() << " trace through BB#" + << MBB->getNumber() << '\n'); + // Set up loop bounds for the backwards post-order traversal. + LoopBounds Bounds(BlockInfo, MTM.Loops); + + // Run an upwards post-order search for the trace start. + Bounds.Downward = false; + Bounds.Visited.clear(); + for (auto I : inverse_post_order_ext(MBB, Bounds)) { + DEBUG(dbgs() << " pred for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the predecessors have been visited, pick the preferred one. + TBI.Pred = pickTracePred(I); + DEBUG({ + if (TBI.Pred) + dbgs() << "BB#" << TBI.Pred->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leading to I is now known, compute the depth resources. + computeDepthResources(I); + } + + // Run a downwards post-order search for the trace end. + Bounds.Downward = true; + Bounds.Visited.clear(); + for (auto I : post_order_ext(MBB, Bounds)) { + DEBUG(dbgs() << " succ for BB#" << I->getNumber() << ": "); + TraceBlockInfo &TBI = BlockInfo[I->getNumber()]; + // All the successors have been visited, pick the preferred one. + TBI.Succ = pickTraceSucc(I); + DEBUG({ + if (TBI.Succ) + dbgs() << "BB#" << TBI.Succ->getNumber() << '\n'; + else + dbgs() << "null\n"; + }); + // The trace leaving I is now known, compute the height resources. + computeHeightResources(I); + } +} + +/// Invalidate traces through BadMBB. +void +MachineTraceMetrics::Ensemble::invalidate(const MachineBasicBlock *BadMBB) { + SmallVector<const MachineBasicBlock*, 16> WorkList; + TraceBlockInfo &BadTBI = BlockInfo[BadMBB->getNumber()]; + + // Invalidate height resources of blocks above MBB. + if (BadTBI.hasValidHeight()) { + BadTBI.invalidateHeight(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " height.\n"); + // Find any MBB predecessors that have MBB as their preferred successor. + // They are the only ones that need to be invalidated. + for (const MachineBasicBlock *Pred : MBB->predecessors()) { + TraceBlockInfo &TBI = BlockInfo[Pred->getNumber()]; + if (!TBI.hasValidHeight()) + continue; + if (TBI.Succ == MBB) { + TBI.invalidateHeight(); + WorkList.push_back(Pred); + continue; + } + // Verify that TBI.Succ is actually a *I successor. + assert((!TBI.Succ || Pred->isSuccessor(TBI.Succ)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Invalidate depth resources of blocks below MBB. + if (BadTBI.hasValidDepth()) { + BadTBI.invalidateDepth(); + WorkList.push_back(BadMBB); + do { + const MachineBasicBlock *MBB = WorkList.pop_back_val(); + DEBUG(dbgs() << "Invalidate BB#" << MBB->getNumber() << ' ' << getName() + << " depth.\n"); + // Find any MBB successors that have MBB as their preferred predecessor. + // They are the only ones that need to be invalidated. + for (const MachineBasicBlock *Succ : MBB->successors()) { + TraceBlockInfo &TBI = BlockInfo[Succ->getNumber()]; + if (!TBI.hasValidDepth()) + continue; + if (TBI.Pred == MBB) { + TBI.invalidateDepth(); + WorkList.push_back(Succ); + continue; + } + // Verify that TBI.Pred is actually a *I predecessor. + assert((!TBI.Pred || Succ->isPredecessor(TBI.Pred)) && "CFG changed"); + } + } while (!WorkList.empty()); + } + + // Clear any per-instruction data. We only have to do this for BadMBB itself + // because the instructions in that block may change. Other blocks may be + // invalidated, but their instructions will stay the same, so there is no + // need to erase the Cycle entries. They will be overwritten when we + // recompute. + for (const auto &I : *BadMBB) + Cycles.erase(&I); +} + +void MachineTraceMetrics::Ensemble::verify() const { +#ifndef NDEBUG + assert(BlockInfo.size() == MTM.MF->getNumBlockIDs() && + "Outdated BlockInfo size"); + for (unsigned Num = 0, e = BlockInfo.size(); Num != e; ++Num) { + const TraceBlockInfo &TBI = BlockInfo[Num]; + if (TBI.hasValidDepth() && TBI.Pred) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isPredecessor(TBI.Pred) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Pred->getNumber()].hasValidDepth() && + "Trace is broken, depth should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + assert(!(Loop && MBB == Loop->getHeader()) && "Trace contains backedge"); + } + if (TBI.hasValidHeight() && TBI.Succ) { + const MachineBasicBlock *MBB = MTM.MF->getBlockNumbered(Num); + assert(MBB->isSuccessor(TBI.Succ) && "CFG doesn't match trace"); + assert(BlockInfo[TBI.Succ->getNumber()].hasValidHeight() && + "Trace is broken, height should have been invalidated."); + const MachineLoop *Loop = getLoopFor(MBB); + const MachineLoop *SuccLoop = getLoopFor(TBI.Succ); + assert(!(Loop && Loop == SuccLoop && TBI.Succ == Loop->getHeader()) && + "Trace contains backedge"); + } + } +#endif +} + +//===----------------------------------------------------------------------===// +// Data Dependencies +//===----------------------------------------------------------------------===// +// +// Compute the depth and height of each instruction based on data dependencies +// and instruction latencies. These cycle numbers assume that the CPU can issue +// an infinite number of instructions per cycle as long as their dependencies +// are ready. + +// A data dependency is represented as a defining MI and operand numbers on the +// defining and using MI. +namespace { +struct DataDep { + const MachineInstr *DefMI; + unsigned DefOp; + unsigned UseOp; + + DataDep(const MachineInstr *DefMI, unsigned DefOp, unsigned UseOp) + : DefMI(DefMI), DefOp(DefOp), UseOp(UseOp) {} + + /// Create a DataDep from an SSA form virtual register. + DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp) + : UseOp(UseOp) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg)); + MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg); + assert(!DefI.atEnd() && "Register has no defs"); + DefMI = DefI->getParent(); + DefOp = DefI.getOperandNo(); + assert((++DefI).atEnd() && "Register has multiple defs"); + } +}; +} + +// Get the input data dependencies that must be ready before UseMI can issue. +// Return true if UseMI has any physreg operands. +static bool getDataDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineRegisterInfo *MRI) { + // Debug values should not be included in any calculations. + if (UseMI->isDebugValue()) + return false; + + bool HasPhysRegs = false; + for (MachineInstr::const_mop_iterator I = UseMI->operands_begin(), + E = UseMI->operands_end(); I != E; ++I) { + const MachineOperand &MO = *I; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + HasPhysRegs = true; + continue; + } + // Collect virtual register reads. + if (MO.readsReg()) + Deps.push_back(DataDep(MRI, Reg, UseMI->getOperandNo(I))); + } + return HasPhysRegs; +} + +// Get the input data dependencies of a PHI instruction, using Pred as the +// preferred predecessor. +// This will add at most one dependency to Deps. +static void getPHIDeps(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + const MachineBasicBlock *Pred, + const MachineRegisterInfo *MRI) { + // No predecessor at the beginning of a trace. Ignore dependencies. + if (!Pred) + return; + assert(UseMI->isPHI() && UseMI->getNumOperands() % 2 && "Bad PHI"); + for (unsigned i = 1; i != UseMI->getNumOperands(); i += 2) { + if (UseMI->getOperand(i + 1).getMBB() == Pred) { + unsigned Reg = UseMI->getOperand(i).getReg(); + Deps.push_back(DataDep(MRI, Reg, i)); + return; + } + } +} + +// Keep track of physreg data dependencies by recording each live register unit. +// Associate each regunit with an instruction operand. Depending on the +// direction instructions are scanned, it could be the operand that defined the +// regunit, or the highest operand to read the regunit. +namespace { +struct LiveRegUnit { + unsigned RegUnit; + unsigned Cycle; + const MachineInstr *MI; + unsigned Op; + + unsigned getSparseSetIndex() const { return RegUnit; } + + LiveRegUnit(unsigned RU) : RegUnit(RU), Cycle(0), MI(nullptr), Op(0) {} +}; +} + +// Identify physreg dependencies for UseMI, and update the live regunit +// tracking set when scanning instructions downwards. +static void updatePhysDepsDownwards(const MachineInstr *UseMI, + SmallVectorImpl<DataDep> &Deps, + SparseSet<LiveRegUnit> &RegUnits, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> Kills; + SmallVector<unsigned, 8> LiveDefOps; + + for (MachineInstr::const_mop_iterator MI = UseMI->operands_begin(), + ME = UseMI->operands_end(); MI != ME; ++MI) { + const MachineOperand &MO = *MI; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + // Track live defs and kills for updating RegUnits. + if (MO.isDef()) { + if (MO.isDead()) + Kills.push_back(Reg); + else + LiveDefOps.push_back(UseMI->getOperandNo(MI)); + } else if (MO.isKill()) + Kills.push_back(Reg); + // Identify dependencies. + if (!MO.readsReg()) + continue; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + Deps.push_back(DataDep(I->MI, I->Op, UseMI->getOperandNo(MI))); + break; + } + } + + // Update RegUnits to reflect live registers after UseMI. + // First kills. + for (unsigned Kill : Kills) + for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units) + RegUnits.erase(*Units); + + // Second, live defs. + for (unsigned DefOp : LiveDefOps) { + for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); + Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + LRU.MI = UseMI; + LRU.Op = DefOp; + } + } +} + +/// The length of the critical path through a trace is the maximum of two path +/// lengths: +/// +/// 1. The maximum height+depth over all instructions in the trace center block. +/// +/// 2. The longest cross-block dependency chain. For small blocks, it is +/// possible that the critical path through the trace doesn't include any +/// instructions in the block. +/// +/// This function computes the second number from the live-in list of the +/// center block. +unsigned MachineTraceMetrics::Ensemble:: +computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { + assert(TBI.HasValidInstrDepths && "Missing depth info"); + assert(TBI.HasValidInstrHeights && "Missing height info"); + unsigned MaxLen = 0; + for (const LiveInReg &LIR : TBI.LiveIns) { + if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) + continue; + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + // Ignore dependencies outside the current trace. + const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; + if (!DefTBI.isUsefulDominator(TBI)) + continue; + unsigned Len = LIR.Height + Cycles[DefMI].Depth; + MaxLen = std::max(MaxLen, Len); + } + return MaxLen; +} + +/// Compute instruction depths for all instructions above or in MBB in its +/// trace. This assumes that the trace through MBB has already been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrDepths(const MachineBasicBlock *MBB) { + // The top of the trace may already be computed, and HasValidInstrDepths + // implies Head->HasValidInstrDepths, so we only need to start from the first + // block in the trace that needs to be recomputed. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidDepth() && "Incomplete trace"); + if (TBI.HasValidInstrDepths) + break; + Stack.push_back(MBB); + MBB = TBI.Pred; + } while (MBB); + + // FIXME: If MBB is non-null at this point, it is the last pre-computed block + // in the trace. We should track any live-out physregs that were defined in + // the trace. This is quite rare in SSA form, typically created by CSE + // hoisting a compare. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // Go through trace blocks in top-down order, stopping after the center block. + SmallVector<DataDep, 8> Deps; + while (!Stack.empty()) { + MBB = Stack.pop_back_val(); + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrDepths = true; + TBI.CriticalPath = 0; + + // Print out resource depths here as well. + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrDepth); + ArrayRef<unsigned> PRDepths = getProcResourceDepths(MBB->getNumber()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + if (PRDepths[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + + // Also compute the critical path length through MBB when possible. + if (TBI.HasValidInstrHeights) + TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); + + for (const auto &UseMI : *MBB) { + // Collect all data dependencies. + Deps.clear(); + if (UseMI.isPHI()) + getPHIDeps(&UseMI, Deps, TBI.Pred, MTM.MRI); + else if (getDataDeps(&UseMI, Deps, MTM.MRI)) + updatePhysDepsDownwards(&UseMI, Deps, RegUnits, MTM.TRI); + + // Filter and process dependencies, computing the earliest issue cycle. + unsigned Cycle = 0; + for (const DataDep &Dep : Deps) { + const TraceBlockInfo&DepTBI = + BlockInfo[Dep.DefMI->getParent()->getNumber()]; + // Ignore dependencies from outside the current trace. + if (!DepTBI.isUsefulDominator(TBI)) + continue; + assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); + unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, &UseMI, Dep.UseOp); + Cycle = std::max(Cycle, DepCycle); + } + // Remember the instruction depth. + InstrCycles &MICycles = Cycles[&UseMI]; + MICycles.Depth = Cycle; + + if (!TBI.HasValidInstrHeights) { + DEBUG(dbgs() << Cycle << '\t' << UseMI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Height); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << UseMI); + } + } +} + +// Identify physreg dependencies for MI when scanning instructions upwards. +// Return the issue height of MI after considering any live regunits. +// Height is the issue height computed from virtual register dependencies alone. +static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, + SparseSet<LiveRegUnit> &RegUnits, + const TargetSchedModel &SchedModel, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SmallVector<unsigned, 8> ReadOps; + + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + const MachineOperand &MO = *MOI; + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + if (MO.readsReg()) + ReadOps.push_back(MI->getOperandNo(MOI)); + if (!MO.isDef()) + continue; + // This is a def of Reg. Remove corresponding entries from RegUnits, and + // update MI Height to consider the physreg dependencies. + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + SparseSet<LiveRegUnit>::iterator I = RegUnits.find(*Units); + if (I == RegUnits.end()) + continue; + unsigned DepHeight = I->Cycle; + if (!MI->isTransient()) { + // We may not know the UseMI of this dependency, if it came from the + // live-in list. SchedModel can handle a NULL UseMI. + DepHeight += SchedModel + .computeOperandLatency(MI, MI->getOperandNo(MOI), I->MI, I->Op); + } + Height = std::max(Height, DepHeight); + // This regunit is dead above MI. + RegUnits.erase(I); + } + } + + // Now we know the height of MI. Update any regunits read. + for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) { + unsigned Reg = MI->getOperand(ReadOps[i]).getReg(); + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + LiveRegUnit &LRU = RegUnits[*Units]; + // Set the height to the highest reader of the unit. + if (LRU.Cycle <= Height && LRU.MI != MI) { + LRU.Cycle = Height; + LRU.MI = MI; + LRU.Op = ReadOps[i]; + } + } + } + + return Height; +} + + +typedef DenseMap<const MachineInstr *, unsigned> MIHeightMap; + +// Push the height of DefMI upwards if required to match UseMI. +// Return true if this is the first time DefMI was seen. +static bool pushDepHeight(const DataDep &Dep, + const MachineInstr *UseMI, unsigned UseHeight, + MIHeightMap &Heights, + const TargetSchedModel &SchedModel, + const TargetInstrInfo *TII) { + // Adjust height by Dep.DefMI latency. + if (!Dep.DefMI->isTransient()) + UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, + UseMI, Dep.UseOp); + + // Update Heights[DefMI] to be the maximum height seen. + MIHeightMap::iterator I; + bool New; + std::tie(I, New) = Heights.insert(std::make_pair(Dep.DefMI, UseHeight)); + if (New) + return true; + + // DefMI has been pushed before. Give it the max height. + if (I->second < UseHeight) + I->second = UseHeight; + return false; +} + +/// Assuming that the virtual register defined by DefMI:DefOp was used by +/// Trace.back(), add it to the live-in lists of all the blocks in Trace. Stop +/// when reaching the block that contains DefMI. +void MachineTraceMetrics::Ensemble:: +addLiveIns(const MachineInstr *DefMI, unsigned DefOp, + ArrayRef<const MachineBasicBlock*> Trace) { + assert(!Trace.empty() && "Trace should contain at least one block"); + unsigned Reg = DefMI->getOperand(DefOp).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + const MachineBasicBlock *DefMBB = DefMI->getParent(); + + // Reg is live-in to all blocks in Trace that follow DefMBB. + for (unsigned i = Trace.size(); i; --i) { + const MachineBasicBlock *MBB = Trace[i-1]; + if (MBB == DefMBB) + return; + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + // Just add the register. The height will be updated later. + TBI.LiveIns.push_back(Reg); + } +} + +/// Compute instruction heights in the trace through MBB. This updates MBB and +/// the blocks below it in the trace. It is assumed that the trace has already +/// been computed. +void MachineTraceMetrics::Ensemble:: +computeInstrHeights(const MachineBasicBlock *MBB) { + // The bottom of the trace may already be computed. + // Find the blocks that need updating. + SmallVector<const MachineBasicBlock*, 8> Stack; + do { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + assert(TBI.hasValidHeight() && "Incomplete trace"); + if (TBI.HasValidInstrHeights) + break; + Stack.push_back(MBB); + TBI.LiveIns.clear(); + MBB = TBI.Succ; + } while (MBB); + + // As we move upwards in the trace, keep track of instructions that are + // required by deeper trace instructions. Map MI -> height required so far. + MIHeightMap Heights; + + // For physregs, the def isn't known when we see the use. + // Instead, keep track of the highest use of each regunit. + SparseSet<LiveRegUnit> RegUnits; + RegUnits.setUniverse(MTM.TRI->getNumRegUnits()); + + // If the bottom of the trace was already precomputed, initialize heights + // from its live-in list. + // MBB is the highest precomputed block in the trace. + if (MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + for (LiveInReg &LI : TBI.LiveIns) { + if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) { + // For virtual registers, the def latency is included. + unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)]; + if (Height < LI.Height) + Height = LI.Height; + } else { + // For register units, the def latency is not included because we don't + // know the def yet. + RegUnits[LI.Reg].Cycle = LI.Height; + } + } + } + + // Go through the trace blocks in bottom-up order. + SmallVector<DataDep, 8> Deps; + for (;!Stack.empty(); Stack.pop_back()) { + MBB = Stack.back(); + DEBUG(dbgs() << "Heights for BB#" << MBB->getNumber() << ":\n"); + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + TBI.HasValidInstrHeights = true; + TBI.CriticalPath = 0; + + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrHeight); + ArrayRef<unsigned> PRHeights = getProcResourceHeights(MBB->getNumber()); + for (unsigned K = 0; K != PRHeights.size(); ++K) + if (PRHeights[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + + // Get dependencies from PHIs in the trace successor. + const MachineBasicBlock *Succ = TBI.Succ; + // If MBB is the last block in the trace, and it has a back-edge to the + // loop header, get loop-carried dependencies from PHIs in the header. For + // that purpose, pretend that all the loop header PHIs have height 0. + if (!Succ) + if (const MachineLoop *Loop = getLoopFor(MBB)) + if (MBB->isSuccessor(Loop->getHeader())) + Succ = Loop->getHeader(); + + if (Succ) { + for (const auto &PHI : *Succ) { + if (!PHI.isPHI()) + break; + Deps.clear(); + getPHIDeps(&PHI, Deps, MBB, MTM.MRI); + if (!Deps.empty()) { + // Loop header PHI heights are all 0. + unsigned Height = TBI.Succ ? Cycles.lookup(&PHI).Height : 0; + DEBUG(dbgs() << "pred\t" << Height << '\t' << PHI); + if (pushDepHeight(Deps.front(), &PHI, Height, + Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Deps.front().DefMI, Deps.front().DefOp, Stack); + } + } + } + + // Go through the block backwards. + for (MachineBasicBlock::const_iterator BI = MBB->end(), BB = MBB->begin(); + BI != BB;) { + const MachineInstr *MI = --BI; + + // Find the MI height as determined by virtual register uses in the + // trace below. + unsigned Cycle = 0; + MIHeightMap::iterator HeightI = Heights.find(MI); + if (HeightI != Heights.end()) { + Cycle = HeightI->second; + // We won't be seeing any more MI uses. + Heights.erase(HeightI); + } + + // Don't process PHI deps. They depend on the specific predecessor, and + // we'll get them when visiting the predecessor. + Deps.clear(); + bool HasPhysRegs = !MI->isPHI() && getDataDeps(MI, Deps, MTM.MRI); + + // There may also be regunit dependencies to include in the height. + if (HasPhysRegs) + Cycle = updatePhysDepsUpwards(MI, Cycle, RegUnits, + MTM.SchedModel, MTM.TII, MTM.TRI); + + // Update the required height of any virtual registers read by MI. + for (const DataDep &Dep : Deps) + if (pushDepHeight(Dep, MI, Cycle, Heights, MTM.SchedModel, MTM.TII)) + addLiveIns(Dep.DefMI, Dep.DefOp, Stack); + + InstrCycles &MICycles = Cycles[MI]; + MICycles.Height = Cycle; + if (!TBI.HasValidInstrDepths) { + DEBUG(dbgs() << Cycle << '\t' << *MI); + continue; + } + // Update critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, Cycle + MICycles.Depth); + DEBUG(dbgs() << TBI.CriticalPath << '\t' << Cycle << '\t' << *MI); + } + + // Update virtual live-in heights. They were added by addLiveIns() with a 0 + // height because the final height isn't known until now. + DEBUG(dbgs() << "BB#" << MBB->getNumber() << " Live-ins:"); + for (LiveInReg &LIR : TBI.LiveIns) { + const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); + LIR.Height = Heights.lookup(DefMI); + DEBUG(dbgs() << ' ' << PrintReg(LIR.Reg) << '@' << LIR.Height); + } + + // Transfer the live regunits to the live-in list. + for (SparseSet<LiveRegUnit>::const_iterator + RI = RegUnits.begin(), RE = RegUnits.end(); RI != RE; ++RI) { + TBI.LiveIns.push_back(LiveInReg(RI->RegUnit, RI->Cycle)); + DEBUG(dbgs() << ' ' << PrintRegUnit(RI->RegUnit, MTM.TRI) + << '@' << RI->Cycle); + } + DEBUG(dbgs() << '\n'); + + if (!TBI.HasValidInstrDepths) + continue; + // Add live-ins to the critical path length. + TBI.CriticalPath = std::max(TBI.CriticalPath, + computeCrossBlockCriticalPath(TBI)); + DEBUG(dbgs() << "Critical path: " << TBI.CriticalPath << '\n'); + } +} + +MachineTraceMetrics::Trace +MachineTraceMetrics::Ensemble::getTrace(const MachineBasicBlock *MBB) { + TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; + + if (!TBI.hasValidDepth() || !TBI.hasValidHeight()) + computeTrace(MBB); + if (!TBI.HasValidInstrDepths) + computeInstrDepths(MBB); + if (!TBI.HasValidInstrHeights) + computeInstrHeights(MBB); + + return Trace(*this, TBI); +} + +unsigned +MachineTraceMetrics::Trace::getInstrSlack(const MachineInstr *MI) const { + assert(MI && "Not an instruction."); + assert(getBlockNum() == unsigned(MI->getParent()->getNumber()) && + "MI must be in the trace center block"); + InstrCycles Cyc = getInstrCycles(MI); + return getCriticalPath() - (Cyc.Depth + Cyc.Height); +} + +unsigned +MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { + const MachineBasicBlock *MBB = TE.MTM.MF->getBlockNumbered(getBlockNum()); + SmallVector<DataDep, 1> Deps; + getPHIDeps(PHI, Deps, MBB, TE.MTM.MRI); + assert(Deps.size() == 1 && "PHI doesn't have MBB as a predecessor"); + DataDep &Dep = Deps.front(); + unsigned DepCycle = getInstrCycles(Dep.DefMI).Depth; + // Add latency if DefMI is a real instruction. Transients get latency 0. + if (!Dep.DefMI->isTransient()) + DepCycle += TE.MTM.SchedModel + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); + return DepCycle; +} + +/// When bottom is set include instructions in current block in estimate. +unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { + // Find the limiting processor resource. + // Numbers have been pre-scaled to be comparable. + unsigned PRMax = 0; + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + if (Bottom) { + ArrayRef<unsigned> PRCycles = TE.MTM.getProcResourceCycles(getBlockNum()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); + } else { + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K]); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + + /// All instructions before current block + unsigned Instrs = TBI.InstrDepth; + // plus instructions in current block + if (Bottom) + Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return std::max(Instrs, PRMax); +} + +unsigned MachineTraceMetrics::Trace::getResourceLength( + ArrayRef<const MachineBasicBlock *> Extrablocks, + ArrayRef<const MCSchedClassDesc *> ExtraInstrs, + ArrayRef<const MCSchedClassDesc *> RemoveInstrs) const { + // Add up resources above and below the center block. + ArrayRef<unsigned> PRDepths = TE.getProcResourceDepths(getBlockNum()); + ArrayRef<unsigned> PRHeights = TE.getProcResourceHeights(getBlockNum()); + unsigned PRMax = 0; + + // Capture computing cycles from extra instructions + auto extraCycles = [this](ArrayRef<const MCSchedClassDesc *> Instrs, + unsigned ResourceIdx) + ->unsigned { + unsigned Cycles = 0; + for (const MCSchedClassDesc *SC : Instrs) { + if (!SC->isValid()) + continue; + for (TargetSchedModel::ProcResIter + PI = TE.MTM.SchedModel.getWriteProcResBegin(SC), + PE = TE.MTM.SchedModel.getWriteProcResEnd(SC); + PI != PE; ++PI) { + if (PI->ProcResourceIdx != ResourceIdx) + continue; + Cycles += + (PI->Cycles * TE.MTM.SchedModel.getResourceFactor(ResourceIdx)); + } + } + return Cycles; + }; + + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (const MachineBasicBlock *MBB : Extrablocks) + PRCycles += TE.MTM.getProcResourceCycles(MBB->getNumber())[K]; + PRCycles += extraCycles(ExtraInstrs, K); + PRCycles -= extraCycles(RemoveInstrs, K); + PRMax = std::max(PRMax, PRCycles); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + + // Instrs: #instructions in current trace outside current block. + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; + // Add instruction count from the extra blocks. + for (const MachineBasicBlock *MBB : Extrablocks) + Instrs += TE.MTM.getResources(MBB)->InstrCount; + Instrs += ExtraInstrs.size(); + Instrs -= RemoveInstrs.size(); + if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) + Instrs /= IW; + // Assume issue width 1 without a schedule model. + return std::max(Instrs, PRMax); +} + +bool MachineTraceMetrics::Trace::isDepInTrace(const MachineInstr *DefMI, + const MachineInstr *UseMI) const { + if (DefMI->getParent() == UseMI->getParent()) + return true; + + const TraceBlockInfo &DepTBI = TE.BlockInfo[DefMI->getParent()->getNumber()]; + const TraceBlockInfo &TBI = TE.BlockInfo[UseMI->getParent()->getNumber()]; + + return DepTBI.isUsefulDominator(TBI); +} + +void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { + OS << getName() << " ensemble:\n"; + for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) { + OS << " BB#" << i << '\t'; + BlockInfo[i].print(OS); + OS << '\n'; + } +} + +void MachineTraceMetrics::TraceBlockInfo::print(raw_ostream &OS) const { + if (hasValidDepth()) { + OS << "depth=" << InstrDepth; + if (Pred) + OS << " pred=BB#" << Pred->getNumber(); + else + OS << " pred=null"; + OS << " head=BB#" << Head; + if (HasValidInstrDepths) + OS << " +instrs"; + } else + OS << "depth invalid"; + OS << ", "; + if (hasValidHeight()) { + OS << "height=" << InstrHeight; + if (Succ) + OS << " succ=BB#" << Succ->getNumber(); + else + OS << " succ=null"; + OS << " tail=BB#" << Tail; + if (HasValidInstrHeights) + OS << " +instrs"; + } else + OS << "height invalid"; + if (HasValidInstrDepths && HasValidInstrHeights) + OS << ", crit=" << CriticalPath; +} + +void MachineTraceMetrics::Trace::print(raw_ostream &OS) const { + unsigned MBBNum = &TBI - &TE.BlockInfo[0]; + + OS << TE.getName() << " trace BB#" << TBI.Head << " --> BB#" << MBBNum + << " --> BB#" << TBI.Tail << ':'; + if (TBI.hasValidHeight() && TBI.hasValidDepth()) + OS << ' ' << getInstrCount() << " instrs."; + if (TBI.HasValidInstrDepths && TBI.HasValidInstrHeights) + OS << ' ' << TBI.CriticalPath << " cycles."; + + const MachineTraceMetrics::TraceBlockInfo *Block = &TBI; + OS << "\nBB#" << MBBNum; + while (Block->hasValidDepth() && Block->Pred) { + unsigned Num = Block->Pred->getNumber(); + OS << " <- BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + + Block = &TBI; + OS << "\n "; + while (Block->hasValidHeight() && Block->Succ) { + unsigned Num = Block->Succ->getNumber(); + OS << " -> BB#" << Num; + Block = &TE.BlockInfo[Num]; + } + OS << '\n'; +} diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp new file mode 100644 index 0000000..cdcd8eb --- /dev/null +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -0,0 +1,1878 @@ +//===-- MachineVerifier.cpp - Machine Code Verifier -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Pass to verify generated machine code. The following is checked: +// +// Operand counts: All explicit operands must be present. +// +// Register classes: All physical and virtual register operands must be +// compatible with the register class required by the instruction descriptor. +// +// Register live intervals: Registers must be defined only once, and must be +// defined before use. +// +// The machine code verifier is enabled from LLVMTargetMachine.cpp with the +// command-line option -verify-machineinstrs, or by defining the environment +// variable LLVM_VERIFY_MACHINEINSTRS to the name of a file that will receive +// the verifier errors. +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +namespace { + struct MachineVerifier { + + MachineVerifier(Pass *pass, const char *b) : + PASS(pass), + Banner(b) + {} + + bool runOnMachineFunction(MachineFunction &MF); + + Pass *const PASS; + const char *Banner; + const MachineFunction *MF; + const TargetMachine *TM; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + + unsigned foundErrors; + + typedef SmallVector<unsigned, 16> RegVector; + typedef SmallVector<const uint32_t*, 4> RegMaskVector; + typedef DenseSet<unsigned> RegSet; + typedef DenseMap<unsigned, const MachineInstr*> RegMap; + typedef SmallPtrSet<const MachineBasicBlock*, 8> BlockSet; + + const MachineInstr *FirstTerminator; + BlockSet FunctionBlocks; + + BitVector regsReserved; + RegSet regsLive; + RegVector regsDefined, regsDead, regsKilled; + RegMaskVector regMasks; + RegSet regsLiveInButUnused; + + SlotIndex lastIndex; + + // Add Reg and any sub-registers to RV + void addRegWithSubRegs(RegVector &RV, unsigned Reg) { + RV.push_back(Reg); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + RV.push_back(*SubRegs); + } + + struct BBInfo { + // Is this MBB reachable from the MF entry point? + bool reachable; + + // Vregs that must be live in because they are used without being + // defined. Map value is the user. + RegMap vregsLiveIn; + + // Regs killed in MBB. They may be defined again, and will then be in both + // regsKilled and regsLiveOut. + RegSet regsKilled; + + // Regs defined in MBB and live out. Note that vregs passing through may + // be live out without being mentioned here. + RegSet regsLiveOut; + + // Vregs that pass through MBB untouched. This set is disjoint from + // regsKilled and regsLiveOut. + RegSet vregsPassed; + + // Vregs that must pass through MBB because they are needed by a successor + // block. This set is disjoint from regsLiveOut. + RegSet vregsRequired; + + // Set versions of block's predecessor and successor lists. + BlockSet Preds, Succs; + + BBInfo() : reachable(false) {} + + // Add register to vregsPassed if it belongs there. Return true if + // anything changed. + bool addPassed(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsKilled.count(Reg) || regsLiveOut.count(Reg)) + return false; + return vregsPassed.insert(Reg).second; + } + + // Same for a full set. + bool addPassed(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addPassed(*I)) + changed = true; + return changed; + } + + // Add register to vregsRequired if it belongs there. Return true if + // anything changed. + bool addRequired(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsLiveOut.count(Reg)) + return false; + return vregsRequired.insert(Reg).second; + } + + // Same for a full set. + bool addRequired(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addRequired(*I)) + changed = true; + return changed; + } + + // Same for a full map. + bool addRequired(const RegMap &RM) { + bool changed = false; + for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I) + if (addRequired(I->first)) + changed = true; + return changed; + } + + // Live-out registers are either in regsLiveOut or vregsPassed. + bool isLiveOut(unsigned Reg) const { + return regsLiveOut.count(Reg) || vregsPassed.count(Reg); + } + }; + + // Extra register info per MBB. + DenseMap<const MachineBasicBlock*, BBInfo> MBBInfoMap; + + bool isReserved(unsigned Reg) { + return Reg < regsReserved.size() && regsReserved.test(Reg); + } + + bool isAllocatable(unsigned Reg) { + return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg); + } + + // Analysis information if available + LiveVariables *LiveVars; + LiveIntervals *LiveInts; + LiveStacks *LiveStks; + SlotIndexes *Indexes; + + void visitMachineFunctionBefore(); + void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); + void visitMachineBundleBefore(const MachineInstr *MI); + void visitMachineInstrBefore(const MachineInstr *MI); + void visitMachineOperand(const MachineOperand *MO, unsigned MONum); + void visitMachineInstrAfter(const MachineInstr *MI); + void visitMachineBundleAfter(const MachineInstr *MI); + void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB); + void visitMachineFunctionAfter(); + + template <typename T> void report(const char *msg, ilist_iterator<T> I) { + report(msg, &*I); + } + void report(const char *msg, const MachineFunction *MF); + void report(const char *msg, const MachineBasicBlock *MBB); + void report(const char *msg, const MachineInstr *MI); + void report(const char *msg, const MachineOperand *MO, unsigned MONum); + + void report_context(const LiveInterval &LI) const; + void report_context(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) const; + void report_context(const LiveRange::Segment &S) const; + void report_context(const VNInfo &VNI) const; + + void verifyInlineAsm(const MachineInstr *MI); + + void checkLiveness(const MachineOperand *MO, unsigned MONum); + void markReachable(const MachineBasicBlock *MBB); + void calcRegsPassed(); + void checkPHIOps(const MachineBasicBlock *MBB); + + void calcRegsRequired(); + void verifyLiveVariables(); + void verifyLiveIntervals(); + void verifyLiveInterval(const LiveInterval&); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned, + unsigned); + void verifyLiveRangeSegment(const LiveRange&, + const LiveRange::const_iterator I, unsigned, + unsigned); + void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0); + + void verifyStackFrame(); + + void verifySlotIndexes() const; + }; + + struct MachineVerifierPass : public MachineFunctionPass { + static char ID; // Pass ID, replacement for typeid + const std::string Banner; + + MachineVerifierPass(const std::string &banner = nullptr) + : MachineFunctionPass(ID), Banner(banner) { + initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override { + MF.verify(this, Banner.c_str()); + return false; + } + }; + +} + +char MachineVerifierPass::ID = 0; +INITIALIZE_PASS(MachineVerifierPass, "machineverifier", + "Verify generated machine code", false, false) + +FunctionPass *llvm::createMachineVerifierPass(const std::string &Banner) { + return new MachineVerifierPass(Banner); +} + +void MachineFunction::verify(Pass *p, const char *Banner) const { + MachineVerifier(p, Banner) + .runOnMachineFunction(const_cast<MachineFunction&>(*this)); +} + +void MachineVerifier::verifySlotIndexes() const { + if (Indexes == nullptr) + return; + + // Ensure the IdxMBB list is sorted by slot indexes. + SlotIndex Last; + for (SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(), + E = Indexes->MBBIndexEnd(); I != E; ++I) { + assert(!Last.isValid() || I->first > Last); + Last = I->first; + } +} + +bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { + foundErrors = 0; + + this->MF = &MF; + TM = &MF.getTarget(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + LiveVars = nullptr; + LiveInts = nullptr; + LiveStks = nullptr; + Indexes = nullptr; + if (PASS) { + LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>(); + // We don't want to verify LiveVariables if LiveIntervals is available. + if (!LiveInts) + LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); + LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>(); + Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>(); + } + + verifySlotIndexes(); + + visitMachineFunctionBefore(); + for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); + MFI!=MFE; ++MFI) { + visitMachineBasicBlockBefore(&*MFI); + // Keep track of the current bundle header. + const MachineInstr *CurBundle = nullptr; + // Do we expect the next instruction to be part of the same bundle? + bool InBundle = false; + + for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { + if (MBBI->getParent() != &*MFI) { + report("Bad instruction parent pointer", MFI); + errs() << "Instruction: " << *MBBI; + continue; + } + + // Check for consistent bundle flags. + if (InBundle && !MBBI->isBundledWithPred()) + report("Missing BundledPred flag, " + "BundledSucc was set on predecessor", + &*MBBI); + if (!InBundle && MBBI->isBundledWithPred()) + report("BundledPred flag is set, " + "but BundledSucc not set on predecessor", + &*MBBI); + + // Is this a bundle header? + if (!MBBI->isInsideBundle()) { + if (CurBundle) + visitMachineBundleAfter(CurBundle); + CurBundle = &*MBBI; + visitMachineBundleBefore(CurBundle); + } else if (!CurBundle) + report("No bundle header", MBBI); + visitMachineInstrBefore(&*MBBI); + for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { + const MachineInstr &MI = *MBBI; + const MachineOperand &Op = MI.getOperand(I); + if (Op.getParent() != &MI) { + // Make sure to use correct addOperand / RemoveOperand / ChangeTo + // functions when replacing operands of a MachineInstr. + report("Instruction has operand with wrong parent set", &MI); + } + + visitMachineOperand(&Op, I); + } + + visitMachineInstrAfter(&*MBBI); + + // Was this the last bundled instruction? + InBundle = MBBI->isBundledWithSucc(); + } + if (CurBundle) + visitMachineBundleAfter(CurBundle); + if (InBundle) + report("BundledSucc flag set on last instruction in block", &MFI->back()); + visitMachineBasicBlockAfter(&*MFI); + } + visitMachineFunctionAfter(); + + if (foundErrors) + report_fatal_error("Found "+Twine(foundErrors)+" machine code errors."); + + // Clean up. + regsLive.clear(); + regsDefined.clear(); + regsDead.clear(); + regsKilled.clear(); + regMasks.clear(); + regsLiveInButUnused.clear(); + MBBInfoMap.clear(); + + return false; // no changes +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF) { + assert(MF); + errs() << '\n'; + if (!foundErrors++) { + if (Banner) + errs() << "# " << Banner << '\n'; + if (LiveInts != nullptr) + LiveInts->print(errs()); + else + MF->print(errs(), Indexes); + } + errs() << "*** Bad machine code: " << msg << " ***\n" + << "- function: " << MF->getName() << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { + assert(MBB); + report(msg, MBB->getParent()); + errs() << "- basic block: BB#" << MBB->getNumber() + << ' ' << MBB->getName() + << " (" << (const void*)MBB << ')'; + if (Indexes) + errs() << " [" << Indexes->getMBBStartIdx(MBB) + << ';' << Indexes->getMBBEndIdx(MBB) << ')'; + errs() << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineInstr *MI) { + assert(MI); + report(msg, MI->getParent()); + errs() << "- instruction: "; + if (Indexes && Indexes->hasIndex(MI)) + errs() << Indexes->getInstructionIndex(MI) << '\t'; + MI->print(errs(), /*SkipOpers=*/true); + errs() << '\n'; +} + +void MachineVerifier::report(const char *msg, + const MachineOperand *MO, unsigned MONum) { + assert(MO); + report(msg, MO->getParent()); + errs() << "- operand " << MONum << ": "; + MO->print(errs(), TRI); + errs() << "\n"; +} + +void MachineVerifier::report_context(const LiveInterval &LI) const { + errs() << "- interval: " << LI << '\n'; +} + +void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) const { + errs() << "- register: " << PrintReg(Reg, TRI) << '\n'; + if (LaneMask != 0) + errs() << "- lanemask: " << PrintLaneMask(LaneMask) << '\n'; + errs() << "- liverange: " << LR << '\n'; +} + +void MachineVerifier::report_context(const LiveRange::Segment &S) const { + errs() << "- segment: " << S << '\n'; +} + +void MachineVerifier::report_context(const VNInfo &VNI) const { + errs() << "- ValNo: " << VNI.id << " (def " << VNI.def << ")\n"; +} + +void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { + BBInfo &MInfo = MBBInfoMap[MBB]; + if (!MInfo.reachable) { + MInfo.reachable = true; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) + markReachable(*SuI); + } +} + +void MachineVerifier::visitMachineFunctionBefore() { + lastIndex = SlotIndex(); + regsReserved = MRI->getReservedRegs(); + + // A sub-register of a reserved register is also reserved + for (int Reg = regsReserved.find_first(); Reg>=0; + Reg = regsReserved.find_next(Reg)) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + // FIXME: This should probably be: + // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register"); + regsReserved.set(*SubRegs); + } + } + + markReachable(&MF->front()); + + // Build a set of the basic blocks in the function. + FunctionBlocks.clear(); + for (const auto &MBB : *MF) { + FunctionBlocks.insert(&MBB); + BBInfo &MInfo = MBBInfoMap[&MBB]; + + MInfo.Preds.insert(MBB.pred_begin(), MBB.pred_end()); + if (MInfo.Preds.size() != MBB.pred_size()) + report("MBB has duplicate entries in its predecessor list.", &MBB); + + MInfo.Succs.insert(MBB.succ_begin(), MBB.succ_end()); + if (MInfo.Succs.size() != MBB.succ_size()) + report("MBB has duplicate entries in its successor list.", &MBB); + } + + // Check that the register use lists are sane. + MRI->verifyUseLists(); + + verifyStackFrame(); +} + +// Does iterator point to a and b as the first two elements? +static bool matchPair(MachineBasicBlock::const_succ_iterator i, + const MachineBasicBlock *a, const MachineBasicBlock *b) { + if (*i == a) + return *++i == b; + if (*i == b) + return *++i == a; + return false; +} + +void +MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { + FirstTerminator = nullptr; + + if (MRI->isSSA()) { + // If this block has allocatable physical registers live-in, check that + // it is an entry block or landing pad. + for (const auto &LI : MBB->liveins()) { + if (isAllocatable(LI.PhysReg) && !MBB->isEHPad() && + MBB != MBB->getParent()->begin()) { + report("MBB has allocable live-in, but isn't entry or landing-pad.", MBB); + } + } + } + + // Count the number of landing pad successors. + SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if ((*I)->isEHPad()) + LandingPadSuccs.insert(*I); + if (!FunctionBlocks.count(*I)) + report("MBB has successor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Preds.count(MBB)) { + report("Inconsistent CFG", MBB); + errs() << "MBB is not in the predecessor list of the successor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + // Check the predecessor list. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (!FunctionBlocks.count(*I)) + report("MBB has predecessor that isn't part of the function.", MBB); + if (!MBBInfoMap[*I].Succs.count(MBB)) { + report("Inconsistent CFG", MBB); + errs() << "MBB is not in the successor list of the predecessor BB#" + << (*I)->getNumber() << ".\n"; + } + } + + const MCAsmInfo *AsmInfo = TM->getMCAsmInfo(); + const BasicBlock *BB = MBB->getBasicBlock(); + const Function *Fn = MF->getFunction(); + if (LandingPadSuccs.size() > 1 && + !(AsmInfo && + AsmInfo->getExceptionHandlingType() == ExceptionHandling::SjLj && + BB && isa<SwitchInst>(BB->getTerminator())) && + !isFuncletEHPersonality(classifyEHPersonality(Fn->getPersonalityFn()))) + report("MBB has more than one landing pad successor", MBB); + + // Call AnalyzeBranch. If it succeeds, there several more conditions to check. + MachineBasicBlock *TBB = nullptr, *FBB = nullptr; + SmallVector<MachineOperand, 4> Cond; + if (!TII->AnalyzeBranch(*const_cast<MachineBasicBlock *>(MBB), + TBB, FBB, Cond)) { + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's + // check whether its answers match up with reality. + if (!TBB && !FBB) { + // Block falls through to its successor. + MachineFunction::const_iterator MBBI = MBB->getIterator(); + ++MBBI; + if (MBBI == MF->end()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actually fall + // out the bottom of the function. + } else if (MBB->succ_size() == LandingPadSuccs.size()) { + // It's possible that the block legitimately ends with a noreturn + // call or an unreachable, in which case it won't actuall fall + // out of the block. + } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) { + report("MBB exits via unconditional fall-through but doesn't have " + "exactly one CFG successor!", MBB); + } else if (!MBB->isSuccessor(&*MBBI)) { + report("MBB exits via unconditional fall-through but its successor " + "differs from its CFG successor!", MBB); + } + if (!MBB->empty() && MBB->back().isBarrier() && + !TII->isPredicated(&MBB->back())) { + report("MBB exits via unconditional fall-through but ends with a " + "barrier instruction!", MBB); + } + if (!Cond.empty()) { + report("MBB exits via unconditional fall-through but has a condition!", + MBB); + } + } else if (TBB && !FBB && Cond.empty()) { + // Block unconditionally branches somewhere. + // If the block has exactly one successor, that happens to be a + // landingpad, accept it as valid control flow. + if (MBB->succ_size() != 1+LandingPadSuccs.size() && + (MBB->succ_size() != 1 || LandingPadSuccs.size() != 1 || + *MBB->succ_begin() != *LandingPadSuccs.begin())) { + report("MBB exits via unconditional branch but doesn't have " + "exactly one CFG successor!", MBB); + } else if (!MBB->isSuccessor(TBB)) { + report("MBB exits via unconditional branch but the CFG " + "successor doesn't match the actual successor!", MBB); + } + if (MBB->empty()) { + report("MBB exits via unconditional branch but doesn't contain " + "any instructions!", MBB); + } else if (!MBB->back().isBarrier()) { + report("MBB exits via unconditional branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().isTerminator()) { + report("MBB exits via unconditional branch but the branch isn't a " + "terminator instruction!", MBB); + } + } else if (TBB && !FBB && !Cond.empty()) { + // Block conditionally branches somewhere, otherwise falls through. + MachineFunction::const_iterator MBBI = MBB->getIterator(); + ++MBBI; + if (MBBI == MF->end()) { + report("MBB conditionally falls through out of function!", MBB); + } else if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (&*MBBI != TBB) + report("MBB exits via conditional branch/fall-through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/fall-through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/fall-through but doesn't have " + "exactly two CFG successors!", MBB); + } else if (!matchPair(MBB->succ_begin(), TBB, &*MBBI)) { + report("MBB exits via conditional branch/fall-through but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/fall-through but doesn't " + "contain any instructions!", MBB); + } else if (MBB->back().isBarrier()) { + report("MBB exits via conditional branch/fall-through but ends with a " + "barrier instruction!", MBB); + } else if (!MBB->back().isTerminator()) { + report("MBB exits via conditional branch/fall-through but the branch " + "isn't a terminator instruction!", MBB); + } + } else if (TBB && FBB) { + // Block conditionally branches somewhere, otherwise branches + // somewhere else. + if (MBB->succ_size() == 1) { + // A conditional branch with only one successor is weird, but allowed. + if (FBB != TBB) + report("MBB exits via conditional branch/branch through but only has " + "one CFG successor!", MBB); + else if (TBB != *MBB->succ_begin()) + report("MBB exits via conditional branch/branch through but the CFG " + "successor don't match the actual successor!", MBB); + } else if (MBB->succ_size() != 2) { + report("MBB exits via conditional branch/branch but doesn't have " + "exactly two CFG successors!", MBB); + } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) { + report("MBB exits via conditional branch/branch but the CFG " + "successors don't match the actual successors!", MBB); + } + if (MBB->empty()) { + report("MBB exits via conditional branch/branch but doesn't " + "contain any instructions!", MBB); + } else if (!MBB->back().isBarrier()) { + report("MBB exits via conditional branch/branch but doesn't end with a " + "barrier instruction!", MBB); + } else if (!MBB->back().isTerminator()) { + report("MBB exits via conditional branch/branch but the branch " + "isn't a terminator instruction!", MBB); + } + if (Cond.empty()) { + report("MBB exits via conditinal branch/branch but there's no " + "condition!", MBB); + } + } else { + report("AnalyzeBranch returned invalid data!", MBB); + } + } + + regsLive.clear(); + for (const auto &LI : MBB->liveins()) { + if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) { + report("MBB live-in list contains non-physical register", MBB); + continue; + } + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); + } + regsLiveInButUnused = regsLive; + + const MachineFrameInfo *MFI = MF->getFrameInfo(); + assert(MFI && "Function has no frame info"); + BitVector PR = MFI->getPristineRegs(*MF); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { + for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + regsLive.insert(*SubRegs); + } + + regsKilled.clear(); + regsDefined.clear(); + + if (Indexes) + lastIndex = Indexes->getMBBStartIdx(MBB); +} + +// This function gets called for all bundle headers, including normal +// stand-alone unbundled instructions. +void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) { + if (Indexes && Indexes->hasIndex(MI)) { + SlotIndex idx = Indexes->getInstructionIndex(MI); + if (!(idx > lastIndex)) { + report("Instruction index out of order", MI); + errs() << "Last instruction was at " << lastIndex << '\n'; + } + lastIndex = idx; + } + + // Ensure non-terminators don't follow terminators. + // Ignore predicated terminators formed by if conversion. + // FIXME: If conversion shouldn't need to violate this rule. + if (MI->isTerminator() && !TII->isPredicated(MI)) { + if (!FirstTerminator) + FirstTerminator = MI; + } else if (FirstTerminator) { + report("Non-terminator instruction after the first terminator", MI); + errs() << "First terminator was:\t" << *FirstTerminator; + } +} + +// The operands on an INLINEASM instruction must follow a template. +// Verify that the flag operands make sense. +void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) { + // The first two operands on INLINEASM are the asm string and global flags. + if (MI->getNumOperands() < 2) { + report("Too few operands on inline asm", MI); + return; + } + if (!MI->getOperand(0).isSymbol()) + report("Asm string must be an external symbol", MI); + if (!MI->getOperand(1).isImm()) + report("Asm flags must be an immediate", MI); + // Allowed flags are Extra_HasSideEffects = 1, Extra_IsAlignStack = 2, + // Extra_AsmDialect = 4, Extra_MayLoad = 8, and Extra_MayStore = 16. + if (!isUInt<5>(MI->getOperand(1).getImm())) + report("Unknown asm flags", &MI->getOperand(1), 1); + + static_assert(InlineAsm::MIOp_FirstOperand == 2, "Asm format changed"); + + unsigned OpNo = InlineAsm::MIOp_FirstOperand; + unsigned NumOps; + for (unsigned e = MI->getNumOperands(); OpNo < e; OpNo += NumOps) { + const MachineOperand &MO = MI->getOperand(OpNo); + // There may be implicit ops after the fixed operands. + if (!MO.isImm()) + break; + NumOps = 1 + InlineAsm::getNumOperandRegisters(MO.getImm()); + } + + if (OpNo > MI->getNumOperands()) + report("Missing operands in last group", MI); + + // An optional MDNode follows the groups. + if (OpNo < MI->getNumOperands() && MI->getOperand(OpNo).isMetadata()) + ++OpNo; + + // All trailing operands must be implicit registers. + for (unsigned e = MI->getNumOperands(); OpNo < e; ++OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + if (!MO.isReg() || !MO.isImplicit()) + report("Expected implicit register after groups", &MO, OpNo); + } +} + +void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { + const MCInstrDesc &MCID = MI->getDesc(); + if (MI->getNumOperands() < MCID.getNumOperands()) { + report("Too few operands", MI); + errs() << MCID.getNumOperands() << " operands expected, but " + << MI->getNumOperands() << " given.\n"; + } + + // Check the tied operands. + if (MI->isInlineAsm()) + verifyInlineAsm(MI); + + // Check the MachineMemOperands for basic consistency. + for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), + E = MI->memoperands_end(); I != E; ++I) { + if ((*I)->isLoad() && !MI->mayLoad()) + report("Missing mayLoad flag", MI); + if ((*I)->isStore() && !MI->mayStore()) + report("Missing mayStore flag", MI); + } + + // Debug values must not have a slot index. + // Other instructions must have one, unless they are inside a bundle. + if (LiveInts) { + bool mapped = !LiveInts->isNotInMIMap(MI); + if (MI->isDebugValue()) { + if (mapped) + report("Debug instruction has a slot index", MI); + } else if (MI->isInsideBundle()) { + if (mapped) + report("Instruction inside bundle has a slot index", MI); + } else { + if (!mapped) + report("Missing slot index", MI); + } + } + + StringRef ErrorInfo; + if (!TII->verifyInstruction(MI, ErrorInfo)) + report(ErrorInfo.data(), MI); +} + +void +MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const MCInstrDesc &MCID = MI->getDesc(); + unsigned NumDefs = MCID.getNumDefs(); + if (MCID.getOpcode() == TargetOpcode::PATCHPOINT) + NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0; + + // The first MCID.NumDefs operands must be explicit register defines + if (MONum < NumDefs) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; + if (!MO->isReg()) + report("Explicit definition must be a register", MO, MONum); + else if (!MO->isDef() && !MCOI.isOptionalDef()) + report("Explicit definition marked as use", MO, MONum); + else if (MO->isImplicit()) + report("Explicit definition marked as implicit", MO, MONum); + } else if (MONum < MCID.getNumOperands()) { + const MCOperandInfo &MCOI = MCID.OpInfo[MONum]; + // Don't check if it's the last operand in a variadic instruction. See, + // e.g., LDM_RET in the arm back end. + if (MO->isReg() && + !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { + if (MO->isDef() && !MCOI.isOptionalDef()) + report("Explicit operand marked as def", MO, MONum); + if (MO->isImplicit()) + report("Explicit operand marked as implicit", MO, MONum); + } + + int TiedTo = MCID.getOperandConstraint(MONum, MCOI::TIED_TO); + if (TiedTo != -1) { + if (!MO->isReg()) + report("Tied use must be a register", MO, MONum); + else if (!MO->isTied()) + report("Operand should be tied", MO, MONum); + else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum)) + report("Tied def doesn't match MCInstrDesc", MO, MONum); + } else if (MO->isReg() && MO->isTied()) + report("Explicit operand should not be tied", MO, MONum); + } else { + // ARM adds %reg0 operands to indicate predicates. We'll allow that. + if (MO->isReg() && !MO->isImplicit() && !MI->isVariadic() && MO->getReg()) + report("Extra explicit operand on non-variadic instruction", MO, MONum); + } + + switch (MO->getType()) { + case MachineOperand::MO_Register: { + const unsigned Reg = MO->getReg(); + if (!Reg) + return; + if (MRI->tracksLiveness() && !MI->isDebugValue()) + checkLiveness(MO, MONum); + + // Verify the consistency of tied operands. + if (MO->isTied()) { + unsigned OtherIdx = MI->findTiedOperandIdx(MONum); + const MachineOperand &OtherMO = MI->getOperand(OtherIdx); + if (!OtherMO.isReg()) + report("Must be tied to a register", MO, MONum); + if (!OtherMO.isTied()) + report("Missing tie flags on tied operand", MO, MONum); + if (MI->findTiedOperandIdx(OtherIdx) != MONum) + report("Inconsistent tie links", MO, MONum); + if (MONum < MCID.getNumDefs()) { + if (OtherIdx < MCID.getNumOperands()) { + if (-1 == MCID.getOperandConstraint(OtherIdx, MCOI::TIED_TO)) + report("Explicit def tied to explicit use without tie constraint", + MO, MONum); + } else { + if (!OtherMO.isImplicit()) + report("Explicit def should be tied to implicit use", MO, MONum); + } + } + } + + // Verify two-address constraints after leaving SSA form. + unsigned DefIdx; + if (!MRI->isSSA() && MO->isUse() && + MI->isRegTiedToDefOperand(MONum, &DefIdx) && + Reg != MI->getOperand(DefIdx).getReg()) + report("Two-address instruction operands must be identical", MO, MONum); + + // Check register classes. + if (MONum < MCID.getNumOperands() && !MO->isImplicit()) { + unsigned SubIdx = MO->getSubReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (SubIdx) { + report("Illegal subregister index for physical register", MO, MONum); + return; + } + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { + if (!DRC->contains(Reg)) { + report("Illegal physical register for instruction", MO, MONum); + errs() << TRI->getName(Reg) << " is not a " + << TRI->getRegClassName(DRC) << " register.\n"; + } + } + } else { + // Virtual register. + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + if (SubIdx) { + const TargetRegisterClass *SRC = + TRI->getSubClassWithSubReg(RC, SubIdx); + if (!SRC) { + report("Invalid subregister index for virtual register", MO, MONum); + errs() << "Register class " << TRI->getRegClassName(RC) + << " does not support subreg index " << SubIdx << "\n"; + return; + } + if (RC != SRC) { + report("Invalid register class for subregister index", MO, MONum); + errs() << "Register class " << TRI->getRegClassName(RC) + << " does not fully support subreg index " << SubIdx << "\n"; + return; + } + } + if (const TargetRegisterClass *DRC = + TII->getRegClass(MCID, MONum, TRI, *MF)) { + if (SubIdx) { + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(RC, *MF); + if (!SuperRC) { + report("No largest legal super class exists.", MO, MONum); + return; + } + DRC = TRI->getMatchingSuperRegClass(SuperRC, DRC, SubIdx); + if (!DRC) { + report("No matching super-reg register class.", MO, MONum); + return; + } + } + if (!RC->hasSuperClassEq(DRC)) { + report("Illegal virtual register for instruction", MO, MONum); + errs() << "Expected a " << TRI->getRegClassName(DRC) + << " register, but got a " << TRI->getRegClassName(RC) + << " register\n"; + } + } + } + } + break; + } + + case MachineOperand::MO_RegisterMask: + regMasks.push_back(MO->getRegMask()); + break; + + case MachineOperand::MO_MachineBasicBlock: + if (MI->isPHI() && !MO->getMBB()->isSuccessor(MI->getParent())) + report("PHI operand is not in the CFG", MO, MONum); + break; + + case MachineOperand::MO_FrameIndex: + if (LiveStks && LiveStks->hasInterval(MO->getIndex()) && + LiveInts && !LiveInts->isNotInMIMap(MI)) { + int FI = MO->getIndex(); + LiveInterval &LI = LiveStks->getInterval(FI); + SlotIndex Idx = LiveInts->getInstructionIndex(MI); + + bool stores = MI->mayStore(); + bool loads = MI->mayLoad(); + // For a memory-to-memory move, we need to check if the frame + // index is used for storing or loading, by inspecting the + // memory operands. + if (stores && loads) { + for (auto *MMO : MI->memoperands()) { + const PseudoSourceValue *PSV = MMO->getPseudoValue(); + if (PSV == nullptr) continue; + const FixedStackPseudoSourceValue *Value = + dyn_cast<FixedStackPseudoSourceValue>(PSV); + if (Value == nullptr) continue; + if (Value->getFrameIndex() != FI) continue; + + if (MMO->isStore()) + loads = false; + else + stores = false; + break; + } + if (loads == stores) + report("Missing fixed stack memoperand.", MI); + } + if (loads && !LI.liveAt(Idx.getRegSlot(true))) { + report("Instruction loads from dead spill slot", MO, MONum); + errs() << "Live stack: " << LI << '\n'; + } + if (stores && !LI.liveAt(Idx.getRegSlot())) { + report("Instruction stores to dead spill slot", MO, MONum); + errs() << "Live stack: " << LI << '\n'; + } + } + break; + + default: + break; + } +} + +void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { + const MachineInstr *MI = MO->getParent(); + const unsigned Reg = MO->getReg(); + + // Both use and def operands can read a register. + if (MO->readsReg()) { + regsLiveInButUnused.erase(Reg); + + if (MO->isKill()) + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill. + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) && + MO->isKill()) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } + + // Check LiveInts liveness and kill. + if (LiveInts && !LiveInts->isNotInMIMap(MI)) { + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI); + // Check the cached regunit intervals. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) { + LiveQueryResult LRQ = LR->Query(UseIdx); + if (!LRQ.valueIn()) { + report("No live segment at use", MO, MONum); + errs() << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) + << ' ' << *LR << '\n'; + } + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + errs() << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; + } + } + } + } + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (LiveInts->hasInterval(Reg)) { + // This is a virtual register interval. + const LiveInterval &LI = LiveInts->getInterval(Reg); + LiveQueryResult LRQ = LI.Query(UseIdx); + if (!LRQ.valueIn()) { + report("No live segment at use", MO, MONum); + errs() << UseIdx << " is not live in " << LI << '\n'; + } + // Check for extra kill flags. + // Note that we allow missing kill flags for now. + if (MO->isKill() && !LRQ.isKill()) { + report("Live range continues after kill flag", MO, MONum); + errs() << "Live range: " << LI << '\n'; + } + } else { + report("Virtual register has no live interval", MO, MONum); + } + } + } + + // Use of a dead register. + if (!regsLive.count(Reg)) { + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // Reserved registers may be used even when 'dead'. + bool Bad = !isReserved(Reg); + // We are fine if just any subregister has a defined value. + if (Bad) { + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); + ++SubRegs) { + if (regsLive.count(*SubRegs)) { + Bad = false; + break; + } + } + } + // If there is an additional implicit-use of a super register we stop + // here. By definition we are fine if the super register is not + // (completely) dead, if the complete super register is dead we will + // get a report for its operand. + if (Bad) { + for (const MachineOperand &MOP : MI->uses()) { + if (!MOP.isReg()) + continue; + if (!MOP.isImplicit()) + continue; + for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid(); + ++SubRegs) { + if (*SubRegs == Reg) { + Bad = false; + break; + } + } + } + } + if (Bad) + report("Using an undefined physical register", MO, MONum); + } else if (MRI->def_empty(Reg)) { + report("Reading virtual register without a def", MO, MONum); + } else { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + // We don't know which virtual registers are live in, so only complain + // if vreg was killed in this MBB. Otherwise keep track of vregs that + // must be live in. PHI instructions are handled separately. + if (MInfo.regsKilled.count(Reg)) + report("Using a killed virtual register", MO, MONum); + else if (!MI->isPHI()) + MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); + } + } + } + + if (MO->isDef()) { + // Register defined. + // TODO: verify that earlyclobber ops are not used. + if (MO->isDead()) + addRegWithSubRegs(regsDead, Reg); + else + addRegWithSubRegs(regsDefined, Reg); + + // Verify SSA form. + if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) && + std::next(MRI->def_begin(Reg)) != MRI->def_end()) + report("Multiple virtual register defs in SSA form", MO, MONum); + + // Check LiveInts for a live segment, but only for virtual registers. + if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && + !LiveInts->isNotInMIMap(MI)) { + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); + DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber()); + if (LiveInts->hasInterval(Reg)) { + const LiveInterval &LI = LiveInts->getInterval(Reg); + if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { + assert(VNI && "NULL valno is not allowed"); + if (VNI->def != DefIdx) { + report("Inconsistent valno->def", MO, MONum); + errs() << "Valno " << VNI->id << " is not defined at " + << DefIdx << " in " << LI << '\n'; + } + } else { + report("No live segment at def", MO, MONum); + errs() << DefIdx << " is not live in " << LI << '\n'; + } + // Check that, if the dead def flag is present, LiveInts agree. + if (MO->isDead()) { + LiveQueryResult LRQ = LI.Query(DefIdx); + if (!LRQ.isDeadDef()) { + report("Live range continues after dead def flag", MO, MONum); + errs() << "Live range: " << LI << '\n'; + } + } + } else { + report("Virtual register has no Live interval", MO, MONum); + } + } + } +} + +void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) { +} + +// This function gets called after visiting all instructions in a bundle. The +// argument points to the bundle header. +// Normal stand-alone instructions are also considered 'bundles', and this +// function is called for all of them. +void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) { + BBInfo &MInfo = MBBInfoMap[MI->getParent()]; + set_union(MInfo.regsKilled, regsKilled); + set_subtract(regsLive, regsKilled); regsKilled.clear(); + // Kill any masked registers. + while (!regMasks.empty()) { + const uint32_t *Mask = regMasks.pop_back_val(); + for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I) + if (TargetRegisterInfo::isPhysicalRegister(*I) && + MachineOperand::clobbersPhysReg(Mask, *I)) + regsDead.push_back(*I); + } + set_subtract(regsLive, regsDead); regsDead.clear(); + set_union(regsLive, regsDefined); regsDefined.clear(); +} + +void +MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) { + MBBInfoMap[MBB].regsLiveOut = regsLive; + regsLive.clear(); + + if (Indexes) { + SlotIndex stop = Indexes->getMBBEndIdx(MBB); + if (!(stop > lastIndex)) { + report("Block ends before last instruction index", MBB); + errs() << "Block ends at " << stop + << " last instruction was at " << lastIndex << '\n'; + } + lastIndex = stop; + } +} + +// Calculate the largest possible vregsPassed sets. These are the registers that +// can pass through an MBB live, but may not be live every time. It is assumed +// that all vregsPassed sets are empty before the call. +void MachineVerifier::calcRegsPassed() { + // First push live-out regs to successors' vregsPassed. Remember the MBBs that + // have any vregsPassed. + SmallPtrSet<const MachineBasicBlock*, 8> todo; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + if (!MInfo.reachable) + continue; + for (MachineBasicBlock::const_succ_iterator SuI = MBB.succ_begin(), + SuE = MBB.succ_end(); SuI != SuE; ++SuI) { + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.regsLiveOut)) + todo.insert(*SuI); + } + } + + // Iteratively push vregsPassed to successors. This will converge to the same + // final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_succ_iterator SuI = MBB->succ_begin(), + SuE = MBB->succ_end(); SuI != SuE; ++SuI) { + if (*SuI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*SuI]; + if (SInfo.addPassed(MInfo.vregsPassed)) + todo.insert(*SuI); + } + } +} + +// Calculate the set of virtual registers that must be passed through each basic +// block in order to satisfy the requirements of successor blocks. This is very +// similar to calcRegsPassed, only backwards. +void MachineVerifier::calcRegsRequired() { + // First push live-in regs to predecessors' vregsRequired. + SmallPtrSet<const MachineBasicBlock*, 8> todo; + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), + PrE = MBB.pred_end(); PrI != PrE; ++PrI) { + BBInfo &PInfo = MBBInfoMap[*PrI]; + if (PInfo.addRequired(MInfo.vregsLiveIn)) + todo.insert(*PrI); + } + } + + // Iteratively push vregsRequired to predecessors. This will converge to the + // same final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (*PrI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*PrI]; + if (SInfo.addRequired(MInfo.vregsRequired)) + todo.insert(*PrI); + } + } +} + +// Check PHI instructions at the beginning of MBB. It is assumed that +// calcRegsPassed has been run so BBInfo::isLiveOut is valid. +void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { + SmallPtrSet<const MachineBasicBlock*, 8> seen; + for (const auto &BBI : *MBB) { + if (!BBI.isPHI()) + break; + seen.clear(); + + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI.getOperand(i).getReg(); + const MachineBasicBlock *Pre = BBI.getOperand(i + 1).getMBB(); + if (!Pre->isSuccessor(MBB)) + continue; + seen.insert(Pre); + BBInfo &PrInfo = MBBInfoMap[Pre]; + if (PrInfo.reachable && !PrInfo.isLiveOut(Reg)) + report("PHI operand is not live-out from predecessor", + &BBI.getOperand(i), i); + } + + // Did we see all predecessors? + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (!seen.count(*PrI)) { + report("Missing PHI operand", &BBI); + errs() << "BB#" << (*PrI)->getNumber() + << " is a predecessor according to the CFG.\n"; + } + } + } +} + +void MachineVerifier::visitMachineFunctionAfter() { + calcRegsPassed(); + + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + + // Skip unreachable MBBs. + if (!MInfo.reachable) + continue; + + checkPHIOps(&MBB); + } + + // Now check liveness info if available + calcRegsRequired(); + + // Check for killed virtual registers that should be live out. + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + if (MInfo.regsKilled.count(*I)) { + report("Virtual register killed in block, but needed live out.", &MBB); + errs() << "Virtual register " << PrintReg(*I) + << " is used after the block.\n"; + } + } + + if (!MF->empty()) { + BBInfo &MInfo = MBBInfoMap[&MF->front()]; + for (RegSet::iterator + I = MInfo.vregsRequired.begin(), E = MInfo.vregsRequired.end(); I != E; + ++I) + report("Virtual register def doesn't dominate all uses.", + MRI->getVRegDef(*I)); + } + + if (LiveVars) + verifyLiveVariables(); + if (LiveInts) + verifyLiveIntervals(); +} + +void MachineVerifier::verifyLiveVariables() { + assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + for (const auto &MBB : *MF) { + BBInfo &MInfo = MBBInfoMap[&MBB]; + + // Our vregsRequired should be identical to LiveVariables' AliveBlocks + if (MInfo.vregsRequired.count(Reg)) { + if (!VI.AliveBlocks.test(MBB.getNumber())) { + report("LiveVariables: Block missing from AliveBlocks", &MBB); + errs() << "Virtual register " << PrintReg(Reg) + << " must be live through the block.\n"; + } + } else { + if (VI.AliveBlocks.test(MBB.getNumber())) { + report("LiveVariables: Block should not be in AliveBlocks", &MBB); + errs() << "Virtual register " << PrintReg(Reg) + << " is not needed live through the block.\n"; + } + } + } + } +} + +void MachineVerifier::verifyLiveIntervals() { + assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts"); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + + // Spilling and splitting may leave unused registers around. Skip them. + if (MRI->reg_nodbg_empty(Reg)) + continue; + + if (!LiveInts->hasInterval(Reg)) { + report("Missing live interval for virtual register", MF); + errs() << PrintReg(Reg, TRI) << " still has defs or uses\n"; + continue; + } + + const LiveInterval &LI = LiveInts->getInterval(Reg); + assert(Reg == LI.reg && "Invalid reg to interval mapping"); + verifyLiveInterval(LI); + } + + // Verify all the cached regunit intervals. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + if (const LiveRange *LR = LiveInts->getCachedRegUnit(i)) + verifyLiveRange(*LR, i); +} + +void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, + const VNInfo *VNI, unsigned Reg, + LaneBitmask LaneMask) { + if (VNI->isUnused()) + return; + + const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); + + if (!DefVNI) { + report("Value not live at VNInfo def and not marked unused", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + return; + } + + if (DefVNI != VNI) { + report("Live segment at def has different VNInfo", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + return; + } + + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); + if (!MBB) { + report("Invalid VNInfo definition index", MF); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + return; + } + + if (VNI->isPHIDef()) { + if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { + report("PHIDef VNInfo is not defined at MBB start", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + } + return; + } + + // Non-PHI def. + const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); + if (!MI) { + report("No instruction at VNInfo def index", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + return; + } + + if (Reg != 0) { + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) + continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MOI->getReg() != Reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), Reg)) + continue; + } + if (LaneMask != 0 && + (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0) + continue; + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; + } + + if (!hasDef) { + report("Defining instruction does not modify register", MI); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + } + + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", MBB); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + } + } +} + +void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, + const LiveRange::const_iterator I, + unsigned Reg, LaneBitmask LaneMask) +{ + const LiveRange::Segment &S = *I; + const VNInfo *VNI = S.valno; + assert(VNI && "Live segment has no valno"); + + if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { + report("Foreign valno in live segment", MF); + report_context(LR, Reg, LaneMask); + report_context(S); + report_context(*VNI); + } + + if (VNI->isUnused()) { + report("Live segment valno is marked unused", MF); + report_context(LR, Reg, LaneMask); + report_context(S); + } + + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); + if (!MBB) { + report("Bad start of live segment, no basic block", MF); + report_context(LR, Reg, LaneMask); + report_context(S); + return; + } + SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); + if (S.start != MBBStartIdx && S.start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB); + report_context(LR, Reg, LaneMask); + report_context(S); + } + + const MachineBasicBlock *EndMBB = + LiveInts->getMBBFromIndex(S.end.getPrevSlot()); + if (!EndMBB) { + report("Bad end of live segment, no basic block", MF); + report_context(LR, Reg, LaneMask); + report_context(S); + return; + } + + // No more checks for live-out segments. + if (S.end == LiveInts->getMBBEndIdx(EndMBB)) + return; + + // RegUnit intervals are allowed dead phis. + if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + S.start == VNI->def && S.end == VNI->def.getDeadSlot()) + return; + + // The live segment is ending inside EndMBB + const MachineInstr *MI = + LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); + if (!MI) { + report("Live segment doesn't end at a valid instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); + return; + } + + // The block slot must refer to a basic block boundary. + if (S.end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); + } + + if (S.end.isDead()) { + // Segment ends on the dead slot. + // That means there must be a dead def. + if (!SlotIndex::isSameInstr(S.start, S.end)) { + report("Live segment ending at dead slot spans instructions", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); + } + } + + // A live segment can only end at an early-clobber slot if it is being + // redefined by an early-clobber def. + if (S.end.isEarlyClobber()) { + if (I+1 == LR.end() || (I+1)->start != S.end) { + report("Live segment ending at early clobber slot must be " + "redefined by an EC def in the same instruction", EndMBB); + report_context(LR, Reg, LaneMask); + report_context(S); + } + } + + // The following checks only apply to virtual registers. Physreg liveness + // is too weird to check. + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // A live segment can end with either a redefinition, a kill flag on a + // use, or a dead flag on a def. + bool hasRead = false; + bool hasSubRegDef = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || MOI->getReg() != Reg) + continue; + if (LaneMask != 0 && + (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0) + continue; + if (MOI->isDef() && MOI->getSubReg() != 0) + hasSubRegDef = true; + if (MOI->readsReg()) + hasRead = true; + } + if (!S.end.isDead()) { + if (!hasRead) { + // When tracking subregister liveness, the main range must start new + // values on partial register writes, even if there is no read. + if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 || + !hasSubRegDef) { + report("Instruction ending live segment doesn't read the register", + MI); + report_context(LR, Reg, LaneMask); + report_context(S); + } + } + } + } + + // Now check all the basic blocks in this live segment. + MachineFunction::const_iterator MFI = MBB->getIterator(); + // Is this live segment the beginning of a non-PHIDef VN? + if (S.start == VNI->def && !VNI->isPHIDef()) { + // Not live-in to any blocks. + if (MBB == EndMBB) + return; + // Skip this block. + ++MFI; + } + for (;;) { + assert(LiveInts->isLiveInToMBB(LR, &*MFI)); + // We don't know how to track physregs into a landing pad. + if (!TargetRegisterInfo::isVirtualRegister(Reg) && + MFI->isEHPad()) { + if (&*MFI == EndMBB) + break; + ++MFI; + continue; + } + + // Is VNI a PHI-def in the current block? + bool IsPHI = VNI->isPHIDef() && + VNI->def == LiveInts->getMBBStartIdx(&*MFI); + + // Check that VNI is live-out of all predecessors. + for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), + PE = MFI->pred_end(); PI != PE; ++PI) { + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); + + // All predecessors must have a live-out value. + if (!PVNI) { + report("Register not marked live out of predecessor", *PI); + report_context(LR, Reg, LaneMask); + report_context(*VNI); + errs() << " live into BB#" << MFI->getNumber() + << '@' << LiveInts->getMBBStartIdx(&*MFI) << ", not live before " + << PEnd << '\n'; + continue; + } + + // Only PHI-defs can take different predecessor values. + if (!IsPHI && PVNI != VNI) { + report("Different value live out of predecessor", *PI); + report_context(LR, Reg, LaneMask); + errs() << "Valno #" << PVNI->id << " live out of BB#" + << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id + << " live into BB#" << MFI->getNumber() << '@' + << LiveInts->getMBBStartIdx(&*MFI) << '\n'; + } + } + if (&*MFI == EndMBB) + break; + ++MFI; + } +} + +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg, + LaneBitmask LaneMask) { + for (const VNInfo *VNI : LR.valnos) + verifyLiveRangeValue(LR, VNI, Reg, LaneMask); + + for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) + verifyLiveRangeSegment(LR, I, Reg, LaneMask); +} + +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + unsigned Reg = LI.reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + verifyLiveRange(LI, Reg); + + LaneBitmask Mask = 0; + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((Mask & SR.LaneMask) != 0) { + report("Lane masks of sub ranges overlap in live interval", MF); + report_context(LI); + } + if ((SR.LaneMask & ~MaxMask) != 0) { + report("Subrange lanemask is invalid", MF); + report_context(LI); + } + if (SR.empty()) { + report("Subrange must not be empty", MF); + report_context(SR, LI.reg, SR.LaneMask); + } + Mask |= SR.LaneMask; + verifyLiveRange(SR, LI.reg, SR.LaneMask); + if (!LI.covers(SR)) { + report("A Subrange is not covered by the main range", MF); + report_context(LI); + } + } + + // Check the LI only has one connected component. + ConnectedVNInfoEqClasses ConEQ(*LiveInts); + unsigned NumComp = ConEQ.Classify(&LI); + if (NumComp > 1) { + report("Multiple connected components in live interval", MF); + report_context(LI); + for (unsigned comp = 0; comp != NumComp; ++comp) { + errs() << comp << ": valnos"; + for (LiveInterval::const_vni_iterator I = LI.vni_begin(), + E = LI.vni_end(); I!=E; ++I) + if (comp == ConEQ.getEqClass(*I)) + errs() << ' ' << (*I)->id; + errs() << '\n'; + } + } +} + +namespace { + // FrameSetup and FrameDestroy can have zero adjustment, so using a single + // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the + // value is zero. + // We use a bool plus an integer to capture the stack state. + struct StackStateOfBB { + StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false), + ExitIsSetup(false) { } + StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) : + EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup), + ExitIsSetup(ExitSetup) { } + // Can be negative, which means we are setting up a frame. + int EntryValue; + int ExitValue; + bool EntryIsSetup; + bool ExitIsSetup; + }; +} + +/// Make sure on every path through the CFG, a FrameSetup <n> is always followed +/// by a FrameDestroy <n>, stack adjustments are identical on all +/// CFG edges to a merge point, and frame is destroyed at end of a return block. +void MachineVerifier::verifyStackFrame() { + unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + + SmallVector<StackStateOfBB, 8> SPState; + SPState.resize(MF->getNumBlockIDs()); + SmallPtrSet<const MachineBasicBlock*, 8> Reachable; + + // Visit the MBBs in DFS order. + for (df_ext_iterator<const MachineFunction*, + SmallPtrSet<const MachineBasicBlock*, 8> > + DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); + DFI != DFE; ++DFI) { + const MachineBasicBlock *MBB = *DFI; + + StackStateOfBB BBState; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue; + BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup; + BBState.ExitValue = BBState.EntryValue; + BBState.ExitIsSetup = BBState.EntryIsSetup; + } + + // Update stack state by checking contents of MBB. + for (const auto &I : *MBB) { + if (I.getOpcode() == FrameSetupOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I.getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (BBState.ExitIsSetup) + report("FrameSetup is after another FrameSetup", &I); + BBState.ExitValue -= Size; + BBState.ExitIsSetup = true; + } + + if (I.getOpcode() == FrameDestroyOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I.getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (!BBState.ExitIsSetup) + report("FrameDestroy is not after a FrameSetup", &I); + int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : + BBState.ExitValue; + if (BBState.ExitIsSetup && AbsSPAdj != Size) { + report("FrameDestroy <n> is after FrameSetup <m>", &I); + errs() << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; + } + BBState.ExitValue += Size; + BBState.ExitIsSetup = false; + } + } + SPState[MBB->getNumber()] = BBState; + + // Make sure the exit state of any predecessor is consistent with the entry + // state. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || + SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + report("The exit stack state of a predecessor is inconsistent.", MBB); + errs() << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + << SPState[(*I)->getNumber()].ExitValue << ", " + << SPState[(*I)->getNumber()].ExitIsSetup + << "), while BB#" << MBB->getNumber() << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + } + } + + // Make sure the entry state of any successor is consistent with the exit + // state. + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || + SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + report("The entry stack state of a successor is inconsistent.", MBB); + errs() << "Successor BB#" << (*I)->getNumber() << " has entry state (" + << SPState[(*I)->getNumber()].EntryValue << ", " + << SPState[(*I)->getNumber()].EntryIsSetup + << "), while BB#" << MBB->getNumber() << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + } + } + + // Make sure a basic block with return ends with zero stack adjustment. + if (!MBB->empty() && MBB->back().isReturn()) { + if (BBState.ExitIsSetup) + report("A return block ends with a FrameSetup.", MBB); + if (BBState.ExitValue) + report("A return block ends with a nonzero stack adjustment.", MBB); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/OcamlGC.cpp b/contrib/llvm/lib/CodeGen/OcamlGC.cpp new file mode 100644 index 0000000..17654a6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/OcamlGC.cpp @@ -0,0 +1,36 @@ +//===-- OcamlGC.cpp - Ocaml frametable GC strategy ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics compatible with +// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. +// +// The frametable emitter is in OcamlGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" + +using namespace llvm; + +namespace { +class OcamlGC : public GCStrategy { +public: + OcamlGC(); +}; +} + +static GCRegistry::Add<OcamlGC> X("ocaml", "ocaml 3.10-compatible GC"); + +void llvm::linkOcamlGC() {} + +OcamlGC::OcamlGC() { + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; +} diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp new file mode 100644 index 0000000..a1042e7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp @@ -0,0 +1,196 @@ +//===-- OptimizePHIs.cpp - Optimize machine instruction PHIs --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass optimizes machine instruction PHIs to take advantage of +// opportunities created during DAG legalization. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "phi-opt" + +STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); +STATISTIC(NumDeadPHICycles, "Number of dead PHI cycles"); + +namespace { + class OptimizePHIs : public MachineFunctionPass { + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + + public: + static char ID; // Pass identification + OptimizePHIs() : MachineFunctionPass(ID) { + initializeOptimizePHIsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + typedef SmallPtrSet<MachineInstr*, 16> InstrSet; + typedef SmallPtrSetIterator<MachineInstr*> InstrSetIterator; + + bool IsSingleValuePHICycle(MachineInstr *MI, unsigned &SingleValReg, + InstrSet &PHIsInCycle); + bool IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle); + bool OptimizeBB(MachineBasicBlock &MBB); + }; +} + +char OptimizePHIs::ID = 0; +char &llvm::OptimizePHIsID = OptimizePHIs::ID; +INITIALIZE_PASS(OptimizePHIs, "opt-phis", + "Optimize machine instruction PHIs", false, false) + +bool OptimizePHIs::runOnMachineFunction(MachineFunction &Fn) { + if (skipOptnoneFunction(*Fn.getFunction())) + return false; + + MRI = &Fn.getRegInfo(); + TII = Fn.getSubtarget().getInstrInfo(); + + // Find dead PHI cycles and PHI cycles that can be replaced by a single + // value. InstCombine does these optimizations, but DAG legalization may + // introduce new opportunities, e.g., when i64 values are split up for + // 32-bit targets. + bool Changed = false; + for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) + Changed |= OptimizeBB(*I); + + return Changed; +} + +/// IsSingleValuePHICycle - Check if MI is a PHI where all the source operands +/// are copies of SingleValReg, possibly via copies through other PHIs. If +/// SingleValReg is zero on entry, it is set to the register with the single +/// non-copy value. PHIsInCycle is a set used to keep track of the PHIs that +/// have been scanned. +bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, + unsigned &SingleValReg, + InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI).second) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + // Scan the PHI operands. + for (unsigned i = 1; i != MI->getNumOperands(); i += 2) { + unsigned SrcReg = MI->getOperand(i).getReg(); + if (SrcReg == DstReg) + continue; + MachineInstr *SrcMI = MRI->getVRegDef(SrcReg); + + // Skip over register-to-register moves. + if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); + if (!SrcMI) + return false; + + if (SrcMI->isPHI()) { + if (!IsSingleValuePHICycle(SrcMI, SingleValReg, PHIsInCycle)) + return false; + } else { + // Fail if there is more than one non-phi/non-move register. + if (SingleValReg != 0) + return false; + SingleValReg = SrcReg; + } + } + return true; +} + +/// IsDeadPHICycle - Check if the register defined by a PHI is only used by +/// other PHIs in a cycle. +bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) { + assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction"); + unsigned DstReg = MI->getOperand(0).getReg(); + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "PHI destination is not a virtual register"); + + // See if we already saw this register. + if (!PHIsInCycle.insert(MI).second) + return true; + + // Don't scan crazily complex things. + if (PHIsInCycle.size() == 16) + return false; + + for (MachineInstr &UseMI : MRI->use_instructions(DstReg)) { + if (!UseMI.isPHI() || !IsDeadPHICycle(&UseMI, PHIsInCycle)) + return false; + } + + return true; +} + +/// OptimizeBB - Remove dead PHI cycles and PHI cycles that can be replaced by +/// a single value. +bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) { + bool Changed = false; + for (MachineBasicBlock::iterator + MII = MBB.begin(), E = MBB.end(); MII != E; ) { + MachineInstr *MI = &*MII++; + if (!MI->isPHI()) + break; + + // Check for single-value PHI cycles. + unsigned SingleValReg = 0; + InstrSet PHIsInCycle; + if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) && + SingleValReg != 0) { + unsigned OldReg = MI->getOperand(0).getReg(); + if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg))) + continue; + + MRI->replaceRegWith(OldReg, SingleValReg); + MI->eraseFromParent(); + ++NumPHICycles; + Changed = true; + continue; + } + + // Check for dead PHI cycles. + PHIsInCycle.clear(); + if (IsDeadPHICycle(MI, PHIsInCycle)) { + for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end(); + PI != PE; ++PI) { + MachineInstr *PhiMI = *PI; + if (&*MII == PhiMI) + ++MII; + PhiMI->eraseFromParent(); + } + ++NumDeadPHICycles; + Changed = true; + } + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp new file mode 100644 index 0000000..2c93792 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -0,0 +1,653 @@ +//===-- PhiElimination.cpp - Eliminate PHI nodes by inserting copies ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates machine instruction PHI nodes by inserting copy +// instructions. This destroys SSA information, but is the desired input for +// some register allocators. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "PHIEliminationUtils.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "phielim" + +static cl::opt<bool> +DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), + cl::Hidden, cl::desc("Disable critical edge splitting " + "during PHI elimination")); + +static cl::opt<bool> +SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), + cl::Hidden, cl::desc("Split all critical edges during " + "PHI elimination")); + +static cl::opt<bool> NoPhiElimLiveOutEarlyExit( + "no-phi-elim-live-out-early-exit", cl::init(false), cl::Hidden, + cl::desc("Do not use an early exit if isLiveOutPastPHIs returns true.")); + +namespace { + class PHIElimination : public MachineFunctionPass { + MachineRegisterInfo *MRI; // Machine register information + LiveVariables *LV; + LiveIntervals *LIS; + + public: + static char ID; // Pass identification, replacement for typeid + PHIElimination() : MachineFunctionPass(ID) { + initializePHIEliminationPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + private: + /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions + /// in predecessor basic blocks. + /// + bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); + void LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator LastPHIIt); + + /// analyzePHINodes - Gather information about the PHI nodes in + /// here. In particular, we want to map the number of uses of a virtual + /// register which is used in a PHI node. We map that to the BB the + /// vreg is coming from. This is used later to determine when the vreg + /// is killed in the BB. + /// + void analyzePHINodes(const MachineFunction& Fn); + + /// Split critical edges where necessary for good coalescer performance. + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, + MachineLoopInfo *MLI); + + // These functions are temporary abstractions around LiveVariables and + // LiveIntervals, so they can go away when LiveVariables does. + bool isLiveIn(unsigned Reg, const MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(unsigned Reg, const MachineBasicBlock *MBB); + + typedef std::pair<unsigned, unsigned> BBVRegPair; + typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse; + + VRegPHIUse VRegPHIUseCount; + + // Defs of PHI sources which are implicit_def. + SmallPtrSet<MachineInstr*, 4> ImpDefs; + + // Map reusable lowered PHI node -> incoming join register. + typedef DenseMap<MachineInstr*, unsigned, + MachineInstrExpressionTrait> LoweredPHIMap; + LoweredPHIMap LoweredPHIs; + }; +} + +STATISTIC(NumLowered, "Number of phis lowered"); +STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); +STATISTIC(NumReused, "Number of reused lowered phis"); + +char PHIElimination::ID = 0; +char& llvm::PHIEliminationID = PHIElimination::ID; + +INITIALIZE_PASS_BEGIN(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", + false, false) +INITIALIZE_PASS_DEPENDENCY(LiveVariables) +INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", + "Eliminate PHI nodes for register allocation", false, false) + +void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { + MRI = &MF.getRegInfo(); + LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); + + bool Changed = false; + + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + + // Split critical edges to help the coalescer. This does not yet support + // updating LiveIntervals, so we disable it. + if (!DisableEdgeSplitting && (LV || LIS)) { + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + for (auto &MBB : MF) + Changed |= SplitPHIEdges(MF, MBB, MLI); + } + + // Populate VRegPHIUseCount + analyzePHINodes(MF); + + // Eliminate PHI instructions by inserting copies into predecessor blocks. + for (auto &MBB : MF) + Changed |= EliminatePHINodes(MF, MBB); + + // Remove dead IMPLICIT_DEF instructions. + for (MachineInstr *DefMI : ImpDefs) { + unsigned DefReg = DefMI->getOperand(0).getReg(); + if (MRI->use_nodbg_empty(DefReg)) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + } + } + + // Clean up the lowered PHI instructions. + for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); + I != E; ++I) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(I->first); + MF.DeleteMachineInstr(I->first); + } + + LoweredPHIs.clear(); + ImpDefs.clear(); + VRegPHIUseCount.clear(); + + return Changed; +} + +/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in +/// predecessor basic blocks. +/// +bool PHIElimination::EliminatePHINodes(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (MBB.empty() || !MBB.front().isPHI()) + return false; // Quick exit for basic blocks without PHIs. + + // Get an iterator to the first instruction after the last PHI node (this may + // also be the end of the basic block). + MachineBasicBlock::iterator LastPHIIt = + std::prev(MBB.SkipPHIsAndLabels(MBB.begin())); + + while (MBB.front().isPHI()) + LowerPHINode(MBB, LastPHIIt); + + return true; +} + +/// isImplicitlyDefined - Return true if all defs of VirtReg are implicit-defs. +/// This includes registers with no defs. +static bool isImplicitlyDefined(unsigned VirtReg, + const MachineRegisterInfo *MRI) { + for (MachineInstr &DI : MRI->def_instructions(VirtReg)) + if (!DI.isImplicitDef()) + return false; + return true; +} + +/// isSourceDefinedByImplicitDef - Return true if all sources of the phi node +/// are implicit_def's. +static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, + const MachineRegisterInfo *MRI) { + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + if (!isImplicitlyDefined(MPhi->getOperand(i).getReg(), MRI)) + return false; + return true; +} + + +/// LowerPHINode - Lower the PHI node at the top of the specified block, +/// +void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator LastPHIIt) { + ++NumLowered; + + MachineBasicBlock::iterator AfterPHIsIt = std::next(LastPHIIt); + + // Unlink the PHI node from the basic block, but don't delete the PHI yet. + MachineInstr *MPhi = MBB.remove(MBB.begin()); + + unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2; + unsigned DestReg = MPhi->getOperand(0).getReg(); + assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs"); + bool isDead = MPhi->getOperand(0).isDead(); + + // Create a new register for the incoming PHI arguments. + MachineFunction &MF = *MBB.getParent(); + unsigned IncomingReg = 0; + bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? + + // Insert a register to register copy at the top of the current block (but + // after any remaining phi nodes) which copies the new incoming register + // into the phi node destination. + const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); + if (isSourceDefinedByImplicitDef(MPhi, MRI)) + // If all sources of a PHI node are implicit_def, just emit an + // implicit_def instead of a copy. + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), DestReg); + else { + // Can we reuse an earlier PHI node? This only happens for critical edges, + // typically those created by tail duplication. + unsigned &entry = LoweredPHIs[MPhi]; + if (entry) { + // An identical PHI node was already lowered. Reuse the incoming register. + IncomingReg = entry; + reusedIncoming = true; + ++NumReused; + DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi); + } else { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); + entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); + } + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), DestReg) + .addReg(IncomingReg); + } + + // Update live variable information if there is any. + if (LV) { + MachineInstr *PHICopy = std::prev(AfterPHIsIt); + + if (IncomingReg) { + LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg); + + // Increment use count of the newly created virtual register. + LV->setPHIJoin(IncomingReg); + + // When we are reusing the incoming register, it may already have been + // killed in this block. The old kill will also have been inserted at + // AfterPHIsIt, so it appears before the current PHICopy. + if (reusedIncoming) + if (MachineInstr *OldKill = VI.findKill(&MBB)) { + DEBUG(dbgs() << "Remove old kill from " << *OldKill); + LV->removeVirtualRegisterKilled(IncomingReg, OldKill); + DEBUG(MBB.dump()); + } + + // Add information to LiveVariables to know that the incoming value is + // killed. Note that because the value is defined in several places (once + // each for each incoming block), the "def" block and instruction fields + // for the VarInfo is not filled in. + LV->addVirtualRegisterKilled(IncomingReg, PHICopy); + } + + // Since we are going to be deleting the PHI node, if it is the last use of + // any registers, or if the value itself is dead, we need to move this + // information over to the new copy we just inserted. + LV->removeVirtualRegistersKilled(MPhi); + + // If the result is dead, update LV. + if (isDead) { + LV->addVirtualRegisterDead(DestReg, PHICopy); + LV->removeVirtualRegisterDead(DestReg, MPhi); + } + } + + // Update LiveIntervals for the new copy or implicit def. + if (LIS) { + MachineInstr *NewInstr = std::prev(AfterPHIsIt); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); + + SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); + if (IncomingReg) { + // Add the region from the beginning of MBB to the copy instruction to + // IncomingReg's live interval. + LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg); + VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); + if (!IncomingVNI) + IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, + LIS->getVNInfoAllocator()); + IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); + } + + LiveInterval &DestLI = LIS->getInterval(DestReg); + assert(DestLI.begin() != DestLI.end() && + "PHIs should have nonempty LiveIntervals."); + if (DestLI.endIndex().isDead()) { + // A dead PHI's live range begins and ends at the start of the MBB, but + // the lowered copy, which will still be dead, needs to begin and end at + // the copy instruction. + VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); + assert(OrigDestVNI && "PHI destination should be live at block entry."); + DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.createDeadDef(DestCopyIndex.getRegSlot(), + LIS->getVNInfoAllocator()); + DestLI.removeValNo(OrigDestVNI); + } else { + // Otherwise, remove the region from the beginning of MBB to the copy + // instruction from DestReg's live interval. + DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI && "PHI destination should be live at its definition."); + DestVNI->def = DestCopyIndex.getRegSlot(); + } + } + + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. + for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) + --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), + MPhi->getOperand(i).getReg())]; + + // Now loop over all of the incoming arguments, changing them to copy into the + // IncomingReg register in the corresponding predecessor basic block. + SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto; + for (int i = NumSrcs - 1; i >= 0; --i) { + unsigned SrcReg = MPhi->getOperand(i*2+1).getReg(); + unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg(); + bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() || + isImplicitlyDefined(SrcReg, MRI); + assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && + "Machine PHI Operands must all be virtual registers!"); + + // Get the MachineBasicBlock equivalent of the BasicBlock that is the source + // path the PHI. + MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB(); + + // Check to make sure we haven't already emitted the copy for this block. + // This can happen because PHI nodes may have multiple entries for the same + // basic block. + if (!MBBsInsertedInto.insert(&opBlock).second) + continue; // If the copy has already been emitted, we're done. + + // Find a safe location to insert the copy, this may be the first terminator + // in the block (or end()). + MachineBasicBlock::iterator InsertPos = + findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); + + // Insert the copy. + MachineInstr *NewSrcInstr = nullptr; + if (!reusedIncoming && IncomingReg) { + if (SrcUndef) { + // The source register is undefined, so there is no need for a real + // COPY, but we still need to ensure joint dominance by defs. + // Insert an IMPLICIT_DEF instruction. + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), + IncomingReg); + + // Clean up the old implicit-def, if there even was one. + if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) + if (DefMI->isImplicitDef()) + ImpDefs.insert(DefMI); + } else { + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); + } + } + + // We only need to update the LiveVariables kill of SrcReg if this was the + // last PHI use of SrcReg to be lowered on this CFG edge and it is not live + // out of the predecessor. We can also ignore undef sources. + if (LV && !SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] && + !LV->isLiveOut(SrcReg, opBlock)) { + // We want to be able to insert a kill of the register if this PHI (aka, + // the copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value + // is live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + + // Okay, if we now know that the value is not live out of the block, we + // can add a kill marker in this block saying that it kills the incoming + // value! + + // In our final twist, we have to decide which instruction kills the + // register. In most cases this is the copy, however, terminator + // instructions at the end of the block may also use the value. In this + // case, we should mark the last such terminator as being the killing + // block, not the copy. + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't insert a copy this time. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = std::prev(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction"); + + // Finally, mark it killed. + LV->addVirtualRegisterKilled(SrcReg, KillInst); + + // This vreg no longer lives all of the way through opBlock. + unsigned opBlockNum = opBlock.getNumber(); + LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); + } + + if (LIS) { + if (NewSrcInstr) { + LIS->InsertMachineInstrInMaps(NewSrcInstr); + LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); + } + + if (!SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) { + LiveInterval &SrcLI = LIS->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + SE = opBlock.succ_end(); SI != SE; ++SI) { + SlotIndex startIdx = LIS->getMBBStartIdx(*SI); + VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); + + // Definitions by other PHIs are not truly live-in for our purposes. + if (VNI && VNI->def != startIdx) { + isLiveOut = true; + break; + } + } + + if (!isLiveOut) { + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't just insert a copy. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = std::prev(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && + "Cannot find kill instruction"); + + SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); + SrcLI.removeSegment(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); + } + } + } + } + + // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. + if (reusedIncoming || !IncomingReg) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MPhi); + MF.DeleteMachineInstr(MPhi); + } +} + +/// analyzePHINodes - Gather information about the PHI nodes in here. In +/// particular, we want to map the number of uses of a virtual register which is +/// used in a PHI node. We map that to the BB the vreg is coming from. This is +/// used later to determine when the vreg is killed in the BB. +/// +void PHIElimination::analyzePHINodes(const MachineFunction& MF) { + for (const auto &MBB : MF) + for (const auto &BBI : MBB) { + if (!BBI.isPHI()) + break; + for (unsigned i = 1, e = BBI.getNumOperands(); i != e; i += 2) + ++VRegPHIUseCount[BBVRegPair(BBI.getOperand(i+1).getMBB()->getNumber(), + BBI.getOperand(i).getReg())]; + } +} + +bool PHIElimination::SplitPHIEdges(MachineFunction &MF, + MachineBasicBlock &MBB, + MachineLoopInfo *MLI) { + if (MBB.empty() || !MBB.front().isPHI() || MBB.isEHPad()) + return false; // Quick exit for basic blocks without PHIs. + + const MachineLoop *CurLoop = MLI ? MLI->getLoopFor(&MBB) : nullptr; + bool IsLoopHeader = CurLoop && &MBB == CurLoop->getHeader(); + + bool Changed = false; + for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end(); + BBI != BBE && BBI->isPHI(); ++BBI) { + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { + unsigned Reg = BBI->getOperand(i).getReg(); + MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB(); + // Is there a critical edge from PreMBB to MBB? + if (PreMBB->succ_size() == 1) + continue; + + // Avoid splitting backedges of loops. It would introduce small + // out-of-line blocks into the loop which is very bad for code placement. + if (PreMBB == &MBB && !SplitAllCriticalEdges) + continue; + const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : nullptr; + if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) + continue; + + // LV doesn't consider a phi use live-out, so isLiveOut only returns true + // when the source register is live-out for some other reason than a phi + // use. That means the copy we will insert in PreMBB won't be a kill, and + // there is a risk it may not be coalesced away. + // + // If the copy would be a kill, there is no need to split the edge. + bool ShouldSplit = isLiveOutPastPHIs(Reg, PreMBB); + if (!ShouldSplit && !NoPhiElimLiveOutEarlyExit) + continue; + if (ShouldSplit) { + DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" + << PreMBB->getNumber() << " -> BB#" << MBB.getNumber() + << ": " << *BBI); + } + + // If Reg is not live-in to MBB, it means it must be live-in to some + // other PreMBB successor, and we can avoid the interference by splitting + // the edge. + // + // If Reg *is* live-in to MBB, the interference is inevitable and a copy + // is likely to be left after coalescing. If we are looking at a loop + // exiting edge, split it so we won't insert code in the loop, otherwise + // don't bother. + ShouldSplit = ShouldSplit && !isLiveIn(Reg, &MBB); + + // Check for a loop exiting edge. + if (!ShouldSplit && CurLoop != PreLoop) { + DEBUG({ + dbgs() << "Split wouldn't help, maybe avoid loop copies?\n"; + if (PreLoop) dbgs() << "PreLoop: " << *PreLoop; + if (CurLoop) dbgs() << "CurLoop: " << *CurLoop; + }); + // This edge could be entering a loop, exiting a loop, or it could be + // both: Jumping directly form one loop to the header of a sibling + // loop. + // Split unless this edge is entering CurLoop from an outer loop. + ShouldSplit = PreLoop && !PreLoop->contains(CurLoop); + } + if (!ShouldSplit && !SplitAllCriticalEdges) + continue; + if (!PreMBB->SplitCriticalEdge(&MBB, this)) { + DEBUG(dbgs() << "Failed to split critical edge.\n"); + continue; + } + Changed = true; + ++NumCriticalEdgesSplit; + } + } + return Changed; +} + +bool PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveIn() requires either LiveVariables or LiveIntervals"); + if (LIS) + return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB); + else + return LV->isLiveIn(Reg, *MBB); +} + +bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, + const MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); + // LiveVariables considers uses in PHIs to be in the predecessor basic block, + // so that a register used only in a PHI is not live out of the block. In + // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than + // in the predecessor basic block, so that a register used only in a PHI is live + // out of the block. + if (LIS) { + const LiveInterval &LI = LIS->getInterval(Reg); + for (const MachineBasicBlock *SI : MBB->successors()) + if (LI.liveAt(LIS->getMBBStartIdx(SI))) + return true; + return false; + } else { + return LV->isLiveOut(Reg, *MBB); + } +} diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp new file mode 100644 index 0000000..4cabc3a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp @@ -0,0 +1,59 @@ +//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "PHIEliminationUtils.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +using namespace llvm; + +// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg +// when following the CFG edge to SuccMBB. This needs to be after any def of +// SrcReg, but before any subsequent point where control flow might jump out of +// the basic block. +MachineBasicBlock::iterator +llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, + unsigned SrcReg) { + // Handle the trivial case trivially. + if (MBB->empty()) + return MBB->begin(); + + // Usually, we just want to insert the copy before the first terminator + // instruction. However, for the edge going to a landing pad, we must insert + // the copy before the call/invoke instruction. + if (!SuccMBB->isEHPad()) + return MBB->getFirstTerminator(); + + // Discover any defs/uses in this basic block. + SmallPtrSet<MachineInstr*, 8> DefUsesInMBB; + MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo(); + for (MachineInstr &RI : MRI.reg_instructions(SrcReg)) { + if (RI.getParent() == MBB) + DefUsesInMBB.insert(&RI); + } + + MachineBasicBlock::iterator InsertPoint; + if (DefUsesInMBB.empty()) { + // No defs. Insert the copy at the start of the basic block. + InsertPoint = MBB->begin(); + } else if (DefUsesInMBB.size() == 1) { + // Insert the copy immediately after the def/use. + InsertPoint = *DefUsesInMBB.begin(); + ++InsertPoint; + } else { + // Insert the copy immediately after the last def/use. + InsertPoint = MBB->end(); + while (!DefUsesInMBB.count(&*--InsertPoint)) {} + ++InsertPoint; + } + + // Make sure the copy goes after any phi nodes however. + return MBB->SkipPHIsAndLabels(InsertPoint); +} diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h new file mode 100644 index 0000000..b997d7a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h @@ -0,0 +1,25 @@ +//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H +#define LLVM_LIB_CODEGEN_PHIELIMINATIONUTILS_H + +#include "llvm/CodeGen/MachineBasicBlock.h" + +namespace llvm { + /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from + /// SrcReg when following the CFG edge to SuccMBB. This needs to be after + /// any def of SrcReg, but before any subsequent point where control flow + /// might jump out of the basic block. + MachineBasicBlock::iterator + findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB, + unsigned SrcReg); +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp new file mode 100644 index 0000000..e73ba02 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp @@ -0,0 +1,96 @@ +//===-- ParallelCG.cpp ----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines functions that can be used for parallel code generation. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ParallelCG.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/thread.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/Utils/SplitModule.h" + +using namespace llvm; + +static void codegen(Module *M, llvm::raw_pwrite_stream &OS, + const Target *TheTarget, StringRef CPU, StringRef Features, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, + TargetMachine::CodeGenFileType FileType) { + std::unique_ptr<TargetMachine> TM(TheTarget->createTargetMachine( + M->getTargetTriple(), CPU, Features, Options, RM, CM, OL)); + + legacy::PassManager CodeGenPasses; + if (TM->addPassesToEmitFile(CodeGenPasses, OS, FileType)) + report_fatal_error("Failed to setup codegen"); + CodeGenPasses.run(*M); +} + +std::unique_ptr<Module> +llvm::splitCodeGen(std::unique_ptr<Module> M, + ArrayRef<llvm::raw_pwrite_stream *> OSs, StringRef CPU, + StringRef Features, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, + TargetMachine::CodeGenFileType FileType) { + StringRef TripleStr = M->getTargetTriple(); + std::string ErrMsg; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleStr, ErrMsg); + if (!TheTarget) + report_fatal_error(Twine("Target not found: ") + ErrMsg); + + if (OSs.size() == 1) { + codegen(M.get(), *OSs[0], TheTarget, CPU, Features, Options, RM, CM, + OL, FileType); + return M; + } + + std::vector<thread> Threads; + SplitModule(std::move(M), OSs.size(), [&](std::unique_ptr<Module> MPart) { + // We want to clone the module in a new context to multi-thread the codegen. + // We do it by serializing partition modules to bitcode (while still on the + // main thread, in order to avoid data races) and spinning up new threads + // which deserialize the partitions into separate contexts. + // FIXME: Provide a more direct way to do this in LLVM. + SmallVector<char, 0> BC; + raw_svector_ostream BCOS(BC); + WriteBitcodeToFile(MPart.get(), BCOS); + + llvm::raw_pwrite_stream *ThreadOS = OSs[Threads.size()]; + Threads.emplace_back( + [TheTarget, CPU, Features, Options, RM, CM, OL, FileType, + ThreadOS](const SmallVector<char, 0> &BC) { + LLVMContext Ctx; + ErrorOr<std::unique_ptr<Module>> MOrErr = + parseBitcodeFile(MemoryBufferRef(StringRef(BC.data(), BC.size()), + "<split-module>"), + Ctx); + if (!MOrErr) + report_fatal_error("Failed to read bitcode"); + std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get()); + + codegen(MPartInCtx.get(), *ThreadOS, TheTarget, CPU, Features, + Options, RM, CM, OL, FileType); + }, + // Pass BC using std::move to ensure that it get moved rather than + // copied into the thread's context. + std::move(BC)); + }); + + for (thread &T : Threads) + T.join(); + + return {}; +} diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp new file mode 100644 index 0000000..873f712 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -0,0 +1,817 @@ +//===-- Passes.cpp - Target independent code generation passes ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines interfaces to access the target independent code +// generation passes provided by the LLVM backend. +// +//===---------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CFLAliasAnalysis.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/SymbolRewriter.h" + +using namespace llvm; + +static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, + cl::desc("Disable Post Regalloc")); +static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, + cl::desc("Disable branch folding")); +static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, + cl::desc("Disable tail duplication")); +static cl::opt<bool> DisableEarlyTailDup("disable-early-taildup", cl::Hidden, + cl::desc("Disable pre-register allocation tail duplication")); +static cl::opt<bool> DisableBlockPlacement("disable-block-placement", + cl::Hidden, cl::desc("Disable probability-driven block placement")); +static cl::opt<bool> EnableBlockPlacementStats("enable-block-placement-stats", + cl::Hidden, cl::desc("Collect probability-driven block placement stats")); +static cl::opt<bool> DisableSSC("disable-ssc", cl::Hidden, + cl::desc("Disable Stack Slot Coloring")); +static cl::opt<bool> DisableMachineDCE("disable-machine-dce", cl::Hidden, + cl::desc("Disable Machine Dead Code Elimination")); +static cl::opt<bool> DisableEarlyIfConversion("disable-early-ifcvt", cl::Hidden, + cl::desc("Disable Early If-conversion")); +static cl::opt<bool> DisableMachineLICM("disable-machine-licm", cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineCSE("disable-machine-cse", cl::Hidden, + cl::desc("Disable Machine Common Subexpression Elimination")); +static cl::opt<cl::boolOrDefault> OptimizeRegAlloc( + "optimize-regalloc", cl::Hidden, + cl::desc("Enable optimized register allocation compilation path.")); +static cl::opt<bool> DisablePostRAMachineLICM("disable-postra-machine-licm", + cl::Hidden, + cl::desc("Disable Machine LICM")); +static cl::opt<bool> DisableMachineSink("disable-machine-sink", cl::Hidden, + cl::desc("Disable Machine Sinking")); +static cl::opt<bool> DisableLSR("disable-lsr", cl::Hidden, + cl::desc("Disable Loop Strength Reduction Pass")); +static cl::opt<bool> DisableConstantHoisting("disable-constant-hoisting", + cl::Hidden, cl::desc("Disable ConstantHoisting")); +static cl::opt<bool> DisableCGP("disable-cgp", cl::Hidden, + cl::desc("Disable Codegen Prepare")); +static cl::opt<bool> DisableCopyProp("disable-copyprop", cl::Hidden, + cl::desc("Disable Copy Propagation pass")); +static cl::opt<bool> DisablePartialLibcallInlining("disable-partial-libcall-inlining", + cl::Hidden, cl::desc("Disable Partial Libcall Inlining")); +static cl::opt<bool> EnableImplicitNullChecks( + "enable-implicit-null-checks", + cl::desc("Fold null checks into faulting memory operations"), + cl::init(false)); +static cl::opt<bool> PrintLSR("print-lsr-output", cl::Hidden, + cl::desc("Print LLVM IR produced by the loop-reduce pass")); +static cl::opt<bool> PrintISelInput("print-isel-input", cl::Hidden, + cl::desc("Print LLVM IR input to isel pass")); +static cl::opt<bool> PrintGCInfo("print-gc", cl::Hidden, + cl::desc("Dump garbage collector data")); +static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden, + cl::desc("Verify generated machine code"), + cl::init(false), + cl::ZeroOrMore); + +static cl::opt<std::string> +PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, + cl::desc("Print machine instrs"), + cl::value_desc("pass-name"), cl::init("option-unspecified")); + +// Temporary option to allow experimenting with MachineScheduler as a post-RA +// scheduler. Targets can "properly" enable this with +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). +// Targets can return true in targetSchedulesPostRAScheduling() and +// insert a PostRA scheduling pass wherever it wants. +cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, + cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); + +// Experimental option to run live interval analysis early. +static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden, + cl::desc("Run live interval analysis earlier in the pipeline")); + +static cl::opt<bool> UseCFLAA("use-cfl-aa-in-codegen", + cl::init(false), cl::Hidden, + cl::desc("Enable the new, experimental CFL alias analysis in CodeGen")); + +/// Allow standard passes to be disabled by command line options. This supports +/// simple binary flags that either suppress the pass or do nothing. +/// i.e. -disable-mypass=false has no effect. +/// These should be converted to boolOrDefault in order to use applyOverride. +static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID, + bool Override) { + if (Override) + return IdentifyingPassPtr(); + return PassID; +} + +/// Allow standard passes to be disabled by the command line, regardless of who +/// is adding the pass. +/// +/// StandardID is the pass identified in the standard pass pipeline and provided +/// to addPass(). It may be a target-specific ID in the case that the target +/// directly adds its own pass, but in that case we harmlessly fall through. +/// +/// TargetID is the pass that the target has configured to override StandardID. +/// +/// StandardID may be a pseudo ID. In that case TargetID is the name of the real +/// pass to run. This allows multiple options to control a single pass depending +/// on where in the pipeline that pass is added. +static IdentifyingPassPtr overridePass(AnalysisID StandardID, + IdentifyingPassPtr TargetID) { + if (StandardID == &PostRASchedulerID) + return applyDisable(TargetID, DisablePostRA); + + if (StandardID == &BranchFolderPassID) + return applyDisable(TargetID, DisableBranchFold); + + if (StandardID == &TailDuplicateID) + return applyDisable(TargetID, DisableTailDuplicate); + + if (StandardID == &TargetPassConfig::EarlyTailDuplicateID) + return applyDisable(TargetID, DisableEarlyTailDup); + + if (StandardID == &MachineBlockPlacementID) + return applyDisable(TargetID, DisableBlockPlacement); + + if (StandardID == &StackSlotColoringID) + return applyDisable(TargetID, DisableSSC); + + if (StandardID == &DeadMachineInstructionElimID) + return applyDisable(TargetID, DisableMachineDCE); + + if (StandardID == &EarlyIfConverterID) + return applyDisable(TargetID, DisableEarlyIfConversion); + + if (StandardID == &MachineLICMID) + return applyDisable(TargetID, DisableMachineLICM); + + if (StandardID == &MachineCSEID) + return applyDisable(TargetID, DisableMachineCSE); + + if (StandardID == &TargetPassConfig::PostRAMachineLICMID) + return applyDisable(TargetID, DisablePostRAMachineLICM); + + if (StandardID == &MachineSinkingID) + return applyDisable(TargetID, DisableMachineSink); + + if (StandardID == &MachineCopyPropagationID) + return applyDisable(TargetID, DisableCopyProp); + + return TargetID; +} + +//===---------------------------------------------------------------------===// +/// TargetPassConfig +//===---------------------------------------------------------------------===// + +INITIALIZE_PASS(TargetPassConfig, "targetpassconfig", + "Target Pass Configuration", false, false) +char TargetPassConfig::ID = 0; + +// Pseudo Pass IDs. +char TargetPassConfig::EarlyTailDuplicateID = 0; +char TargetPassConfig::PostRAMachineLICMID = 0; + +namespace { +struct InsertedPass { + AnalysisID TargetPassID; + IdentifyingPassPtr InsertedPassID; + bool VerifyAfter; + bool PrintAfter; + + InsertedPass(AnalysisID TargetPassID, IdentifyingPassPtr InsertedPassID, + bool VerifyAfter, bool PrintAfter) + : TargetPassID(TargetPassID), InsertedPassID(InsertedPassID), + VerifyAfter(VerifyAfter), PrintAfter(PrintAfter) {} + + Pass *getInsertedPass() const { + assert(InsertedPassID.isValid() && "Illegal Pass ID!"); + if (InsertedPassID.isInstance()) + return InsertedPassID.getInstance(); + Pass *NP = Pass::createPass(InsertedPassID.getID()); + assert(NP && "Pass ID not registered"); + return NP; + } +}; +} + +namespace llvm { +class PassConfigImpl { +public: + // List of passes explicitly substituted by this target. Normally this is + // empty, but it is a convenient way to suppress or replace specific passes + // that are part of a standard pass pipeline without overridding the entire + // pipeline. This mechanism allows target options to inherit a standard pass's + // user interface. For example, a target may disable a standard pass by + // default by substituting a pass ID of zero, and the user may still enable + // that standard pass with an explicit command line option. + DenseMap<AnalysisID,IdentifyingPassPtr> TargetPasses; + + /// Store the pairs of <AnalysisID, AnalysisID> of which the second pass + /// is inserted after each instance of the first one. + SmallVector<InsertedPass, 4> InsertedPasses; +}; +} // namespace llvm + +// Out of line virtual method. +TargetPassConfig::~TargetPassConfig() { + delete Impl; +} + +// Out of line constructor provides default values for pass options and +// registers all common codegen passes. +TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) + : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr), + StopAfter(nullptr), Started(true), Stopped(false), + AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false), + DisableVerify(false), EnableTailMerge(true) { + + Impl = new PassConfigImpl(); + + // Register all target independent codegen passes to activate their PassIDs, + // including this pass itself. + initializeCodeGen(*PassRegistry::getPassRegistry()); + + // Also register alias analysis passes required by codegen passes. + initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + + // Substitute Pseudo Pass IDs for real ones. + substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); + substitutePass(&PostRAMachineLICMID, &MachineLICMID); +} + +/// Insert InsertedPassID pass after TargetPassID. +void TargetPassConfig::insertPass(AnalysisID TargetPassID, + IdentifyingPassPtr InsertedPassID, + bool VerifyAfter, bool PrintAfter) { + assert(((!InsertedPassID.isInstance() && + TargetPassID != InsertedPassID.getID()) || + (InsertedPassID.isInstance() && + TargetPassID != InsertedPassID.getInstance()->getPassID())) && + "Insert a pass after itself!"); + Impl->InsertedPasses.emplace_back(TargetPassID, InsertedPassID, VerifyAfter, + PrintAfter); +} + +/// createPassConfig - Create a pass configuration object to be used by +/// addPassToEmitX methods for generating a pipeline of CodeGen passes. +/// +/// Targets may override this to extend TargetPassConfig. +TargetPassConfig *LLVMTargetMachine::createPassConfig(PassManagerBase &PM) { + return new TargetPassConfig(this, PM); +} + +TargetPassConfig::TargetPassConfig() + : ImmutablePass(ID), PM(nullptr) { + llvm_unreachable("TargetPassConfig should not be constructed on-the-fly"); +} + +// Helper to verify the analysis is really immutable. +void TargetPassConfig::setOpt(bool &Opt, bool Val) { + assert(!Initialized && "PassConfig is immutable"); + Opt = Val; +} + +void TargetPassConfig::substitutePass(AnalysisID StandardID, + IdentifyingPassPtr TargetID) { + Impl->TargetPasses[StandardID] = TargetID; +} + +IdentifyingPassPtr TargetPassConfig::getPassSubstitution(AnalysisID ID) const { + DenseMap<AnalysisID, IdentifyingPassPtr>::const_iterator + I = Impl->TargetPasses.find(ID); + if (I == Impl->TargetPasses.end()) + return ID; + return I->second; +} + +/// Add a pass to the PassManager if that pass is supposed to be run. If the +/// Started/Stopped flags indicate either that the compilation should start at +/// a later pass or that it should stop after an earlier pass, then do not add +/// the pass. Finally, compare the current pass against the StartAfter +/// and StopAfter options and change the Started/Stopped flags accordingly. +void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) { + assert(!Initialized && "PassConfig is immutable"); + + // Cache the Pass ID here in case the pass manager finds this pass is + // redundant with ones already scheduled / available, and deletes it. + // Fundamentally, once we add the pass to the manager, we no longer own it + // and shouldn't reference it. + AnalysisID PassID = P->getPassID(); + + if (StartBefore == PassID) + Started = true; + if (Started && !Stopped) { + std::string Banner; + // Construct banner message before PM->add() as that may delete the pass. + if (AddingMachinePasses && (printAfter || verifyAfter)) + Banner = std::string("After ") + std::string(P->getPassName()); + PM->add(P); + if (AddingMachinePasses) { + if (printAfter) + addPrintPass(Banner); + if (verifyAfter) + addVerifyPass(Banner); + } + + // Add the passes after the pass P if there is any. + for (auto IP : Impl->InsertedPasses) { + if (IP.TargetPassID == PassID) + addPass(IP.getInsertedPass(), IP.VerifyAfter, IP.PrintAfter); + } + } else { + delete P; + } + if (StopAfter == PassID) + Stopped = true; + if (StartAfter == PassID) + Started = true; + if (Stopped && !Started) + report_fatal_error("Cannot stop compilation after pass that is not run"); +} + +/// Add a CodeGen pass at this point in the pipeline after checking for target +/// and command line overrides. +/// +/// addPass cannot return a pointer to the pass instance because is internal the +/// PassManager and the instance we create here may already be freed. +AnalysisID TargetPassConfig::addPass(AnalysisID PassID, bool verifyAfter, + bool printAfter) { + IdentifyingPassPtr TargetID = getPassSubstitution(PassID); + IdentifyingPassPtr FinalPtr = overridePass(PassID, TargetID); + if (!FinalPtr.isValid()) + return nullptr; + + Pass *P; + if (FinalPtr.isInstance()) + P = FinalPtr.getInstance(); + else { + P = Pass::createPass(FinalPtr.getID()); + if (!P) + llvm_unreachable("Pass ID not registered"); + } + AnalysisID FinalID = P->getPassID(); + addPass(P, verifyAfter, printAfter); // Ends the lifetime of P. + + return FinalID; +} + +void TargetPassConfig::printAndVerify(const std::string &Banner) { + addPrintPass(Banner); + addVerifyPass(Banner); +} + +void TargetPassConfig::addPrintPass(const std::string &Banner) { + if (TM->shouldPrintMachineCode()) + PM->add(createMachineFunctionPrinterPass(dbgs(), Banner)); +} + +void TargetPassConfig::addVerifyPass(const std::string &Banner) { + if (VerifyMachineCode) + PM->add(createMachineVerifierPass(Banner)); +} + +/// Add common target configurable passes that perform LLVM IR to IR transforms +/// following machine independent optimization. +void TargetPassConfig::addIRPasses() { + // Basic AliasAnalysis support. + // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that + // BasicAliasAnalysis wins if they disagree. This is intended to help + // support "obvious" type-punning idioms. + if (UseCFLAA) + addPass(createCFLAAWrapperPass()); + addPass(createTypeBasedAAWrapperPass()); + addPass(createScopedNoAliasAAWrapperPass()); + addPass(createBasicAAWrapperPass()); + + // Before running any passes, run the verifier to determine if the input + // coming from the front-end and/or optimizer is valid. + if (!DisableVerify) + addPass(createVerifierPass()); + + // Run loop strength reduction before anything else. + if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { + addPass(createLoopStrengthReducePass()); + if (PrintLSR) + addPass(createPrintFunctionPass(dbgs(), "\n\n*** Code after LSR ***\n")); + } + + // Run GC lowering passes for builtin collectors + // TODO: add a pass insertion point here + addPass(createGCLoweringPass()); + addPass(createShadowStackGCLoweringPass()); + + // Make sure that no unreachable blocks are instruction selected. + addPass(createUnreachableBlockEliminationPass()); + + // Prepare expensive constants for SelectionDAG. + if (getOptLevel() != CodeGenOpt::None && !DisableConstantHoisting) + addPass(createConstantHoistingPass()); + + if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining) + addPass(createPartiallyInlineLibCallsPass()); +} + +/// Turn exception handling constructs into something the code generators can +/// handle. +void TargetPassConfig::addPassesToHandleExceptions() { + switch (TM->getMCAsmInfo()->getExceptionHandlingType()) { + case ExceptionHandling::SjLj: + // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both + // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise, + // catch info can get misplaced when a selector ends up more than one block + // removed from the parent invoke(s). This could happen when a landing + // pad is shared by multiple invokes and is also a target of a normal + // edge from elsewhere. + addPass(createSjLjEHPreparePass()); + // FALLTHROUGH + case ExceptionHandling::DwarfCFI: + case ExceptionHandling::ARM: + addPass(createDwarfEHPass(TM)); + break; + case ExceptionHandling::WinEH: + // We support using both GCC-style and MSVC-style exceptions on Windows, so + // add both preparation passes. Each pass will only actually run if it + // recognizes the personality function. + addPass(createWinEHPass(TM)); + addPass(createDwarfEHPass(TM)); + break; + case ExceptionHandling::None: + addPass(createLowerInvokePass()); + + // The lower invoke pass may create unreachable code. Remove it. + addPass(createUnreachableBlockEliminationPass()); + break; + } +} + +/// Add pass to prepare the LLVM IR for code generation. This should be done +/// before exception handling preparation passes. +void TargetPassConfig::addCodeGenPrepare() { + if (getOptLevel() != CodeGenOpt::None && !DisableCGP) + addPass(createCodeGenPreparePass(TM)); + addPass(createRewriteSymbolsPass()); +} + +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +void TargetPassConfig::addISelPrepare() { + addPreISel(); + + // Add both the safe stack and the stack protection passes: each of them will + // only protect functions that have corresponding attributes. + addPass(createSafeStackPass(TM)); + addPass(createStackProtectorPass(TM)); + + if (PrintISelInput) + addPass(createPrintFunctionPass( + dbgs(), "\n\n*** Final LLVM Code input to ISel ***\n")); + + // All passes which modify the LLVM IR are now complete; run the verifier + // to ensure that the IR is valid. + if (!DisableVerify) + addPass(createVerifierPass()); +} + +/// Add the complete set of target-independent postISel code generator passes. +/// +/// This can be read as the standard order of major LLVM CodeGen stages. Stages +/// with nontrivial configuration or multiple passes are broken out below in +/// add%Stage routines. +/// +/// Any TargetPassConfig::addXX routine may be overriden by the Target. The +/// addPre/Post methods with empty header implementations allow injecting +/// target-specific fixups just before or after major stages. Additionally, +/// targets have the flexibility to change pass order within a stage by +/// overriding default implementation of add%Stage routines below. Each +/// technique has maintainability tradeoffs because alternate pass orders are +/// not well supported. addPre/Post works better if the target pass is easily +/// tied to a common pass. But if it has subtle dependencies on multiple passes, +/// the target should override the stage instead. +/// +/// TODO: We could use a single addPre/Post(ID) hook to allow pass injection +/// before/after any target-independent pass. But it's currently overkill. +void TargetPassConfig::addMachinePasses() { + AddingMachinePasses = true; + + // Insert a machine instr printer pass after the specified pass. + // If -print-machineinstrs specified, print machineinstrs after all passes. + if (StringRef(PrintMachineInstrs.getValue()).equals("")) + TM->Options.PrintMachineCode = true; + else if (!StringRef(PrintMachineInstrs.getValue()) + .equals("option-unspecified")) { + const PassRegistry *PR = PassRegistry::getPassRegistry(); + const PassInfo *TPI = PR->getPassInfo(PrintMachineInstrs.getValue()); + const PassInfo *IPI = PR->getPassInfo(StringRef("machineinstr-printer")); + assert (TPI && IPI && "Pass ID not registered!"); + const char *TID = (const char *)(TPI->getTypeInfo()); + const char *IID = (const char *)(IPI->getTypeInfo()); + insertPass(TID, IID); + } + + // Print the instruction selected machine code... + printAndVerify("After Instruction Selection"); + + // Expand pseudo-instructions emitted by ISel. + addPass(&ExpandISelPseudosID); + + // Add passes that optimize machine instructions in SSA form. + if (getOptLevel() != CodeGenOpt::None) { + addMachineSSAOptimization(); + } else { + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(&LocalStackSlotAllocationID, false); + } + + // Run pre-ra passes. + addPreRegAlloc(); + + // Run register allocation and passes that are tightly coupled with it, + // including phi elimination and scheduling. + if (getOptimizeRegAlloc()) + addOptimizedRegAlloc(createRegAllocPass(true)); + else + addFastRegAlloc(createRegAllocPass(false)); + + // Run post-ra passes. + addPostRegAlloc(); + + // Insert prolog/epilog code. Eliminate abstract frame index references... + if (getOptLevel() != CodeGenOpt::None) + addPass(&ShrinkWrapID); + + addPass(&PrologEpilogCodeInserterID); + + /// Add passes that optimize machine instructions after register allocation. + if (getOptLevel() != CodeGenOpt::None) + addMachineLateOptimization(); + + // Expand pseudo instructions before second scheduling pass. + addPass(&ExpandPostRAPseudosID); + + // Run pre-sched2 passes. + addPreSched2(); + + if (EnableImplicitNullChecks) + addPass(&ImplicitNullChecksID); + + // Second pass scheduler. + // Let Target optionally insert this pass by itself at some other + // point. + if (getOptLevel() != CodeGenOpt::None && + !TM->targetSchedulesPostRAScheduling()) { + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); + } + + // GC + if (addGCPasses()) { + if (PrintGCInfo) + addPass(createGCInfoPrinter(dbgs()), false, false); + } + + // Basic block placement. + if (getOptLevel() != CodeGenOpt::None) + addBlockPlacement(); + + addPreEmitPass(); + + addPass(&FuncletLayoutID, false); + + addPass(&StackMapLivenessID, false); + addPass(&LiveDebugValuesID, false); + + AddingMachinePasses = false; +} + +/// Add passes that optimize machine instructions in SSA form. +void TargetPassConfig::addMachineSSAOptimization() { + // Pre-ra tail duplication. + addPass(&EarlyTailDuplicateID); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(&OptimizePHIsID, false); + + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID, false); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(&LocalStackSlotAllocationID, false); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(&DeadMachineInstructionElimID); + + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + addILPOpts(); + + addPass(&MachineLICMID, false); + addPass(&MachineCSEID, false); + addPass(&MachineSinkingID); + + addPass(&PeepholeOptimizerID); + // Clean-up the dead code that may have been generated by peephole + // rewriting. + addPass(&DeadMachineInstructionElimID); +} + +//===---------------------------------------------------------------------===// +/// Register Allocation Pass Configuration +//===---------------------------------------------------------------------===// + +bool TargetPassConfig::getOptimizeRegAlloc() const { + switch (OptimizeRegAlloc) { + case cl::BOU_UNSET: return getOptLevel() != CodeGenOpt::None; + case cl::BOU_TRUE: return true; + case cl::BOU_FALSE: return false; + } + llvm_unreachable("Invalid optimize-regalloc state"); +} + +/// RegisterRegAlloc's global Registry tracks allocator registration. +MachinePassRegistry RegisterRegAlloc::Registry; + +/// A dummy default pass factory indicates whether the register allocator is +/// overridden on the command line. +static FunctionPass *useDefaultRegisterAllocator() { return nullptr; } +static RegisterRegAlloc +defaultRegAlloc("default", + "pick register allocator based on -O option", + useDefaultRegisterAllocator); + +/// -regalloc=... command line option. +static cl::opt<RegisterRegAlloc::FunctionPassCtor, false, + RegisterPassParser<RegisterRegAlloc> > +RegAlloc("regalloc", + cl::init(&useDefaultRegisterAllocator), + cl::desc("Register allocator to use")); + + +/// Instantiate the default register allocator pass for this target for either +/// the optimized or unoptimized allocation path. This will be added to the pass +/// manager by addFastRegAlloc in the unoptimized case or addOptimizedRegAlloc +/// in the optimized case. +/// +/// A target that uses the standard regalloc pass order for fast or optimized +/// allocation may still override this for per-target regalloc +/// selection. But -regalloc=... always takes precedence. +FunctionPass *TargetPassConfig::createTargetRegisterAllocator(bool Optimized) { + if (Optimized) + return createGreedyRegisterAllocator(); + else + return createFastRegisterAllocator(); +} + +/// Find and instantiate the register allocation pass requested by this target +/// at the current optimization level. Different register allocators are +/// defined as separate passes because they may require different analysis. +/// +/// This helper ensures that the regalloc= option is always available, +/// even for targets that override the default allocator. +/// +/// FIXME: When MachinePassRegistry register pass IDs instead of function ptrs, +/// this can be folded into addPass. +FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) { + RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); + + // Initialize the global default. + if (!Ctor) { + Ctor = RegAlloc; + RegisterRegAlloc::setDefault(RegAlloc); + } + if (Ctor != useDefaultRegisterAllocator) + return Ctor(); + + // With no -regalloc= override, ask the target for a regalloc pass. + return createTargetRegisterAllocator(Optimized); +} + +/// Return true if the default global register allocator is in use and +/// has not be overriden on the command line with '-regalloc=...' +bool TargetPassConfig::usingDefaultRegAlloc() const { + return RegAlloc.getNumOccurrences() == 0; +} + +/// Add the minimum set of target-independent passes that are required for +/// register allocation. No coalescing or scheduling. +void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { + addPass(&PHIEliminationID, false); + addPass(&TwoAddressInstructionPassID, false); + + if (RegAllocPass) + addPass(RegAllocPass); +} + +/// Add standard target-independent passes that are tightly coupled with +/// optimized register allocation, including coalescing, machine instruction +/// scheduling, and register allocation itself. +void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { + addPass(&ProcessImplicitDefsID, false); + + // LiveVariables currently requires pure SSA form. + // + // FIXME: Once TwoAddressInstruction pass no longer uses kill flags, + // LiveVariables can be removed completely, and LiveIntervals can be directly + // computed. (We still either need to regenerate kill flags after regalloc, or + // preferably fix the scavenger to not depend on them). + addPass(&LiveVariablesID, false); + + // Edge splitting is smarter with machine loop info. + addPass(&MachineLoopInfoID, false); + addPass(&PHIEliminationID, false); + + // Eventually, we want to run LiveIntervals before PHI elimination. + if (EarlyLiveIntervals) + addPass(&LiveIntervalsID, false); + + addPass(&TwoAddressInstructionPassID, false); + addPass(&RegisterCoalescerID); + + // PreRA instruction scheduling. + addPass(&MachineSchedulerID); + + if (RegAllocPass) { + // Add the selected register allocation pass. + addPass(RegAllocPass); + + // Allow targets to change the register assignments before rewriting. + addPreRewrite(); + + // Finally rewrite virtual registers. + addPass(&VirtRegRewriterID); + + // Perform stack slot coloring and post-ra machine LICM. + // + // FIXME: Re-enable coloring with register when it's capable of adding + // kill markers. + addPass(&StackSlotColoringID); + + // Run post-ra machine LICM to hoist reloads / remats. + // + // FIXME: can this move into MachineLateOptimization? + addPass(&PostRAMachineLICMID); + } +} + +//===---------------------------------------------------------------------===// +/// Post RegAlloc Pass Configuration +//===---------------------------------------------------------------------===// + +/// Add passes that optimize machine instructions after register allocation. +void TargetPassConfig::addMachineLateOptimization() { + // Branch folding must be run after regalloc and prolog/epilog insertion. + addPass(&BranchFolderPassID); + + // Tail duplication. + // Note that duplicating tail just increases code size and degrades + // performance for targets that require Structured Control Flow. + // In addition it can also make CFG irreducible. Thus we disable it. + if (!TM->requiresStructuredCFG()) + addPass(&TailDuplicateID); + + // Copy propagation. + addPass(&MachineCopyPropagationID); +} + +/// Add standard GC passes. +bool TargetPassConfig::addGCPasses() { + addPass(&GCMachineCodeAnalysisID, false); + return true; +} + +/// Add standard basic block placement passes. +void TargetPassConfig::addBlockPlacement() { + if (addPass(&MachineBlockPlacementID, false)) { + // Run a separate pass to collect block placement statistics. + if (EnableBlockPlacementStats) + addPass(&MachineBlockPlacementStatsID); + } +} diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp new file mode 100644 index 0000000..52b42b6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -0,0 +1,1947 @@ +//===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Perform peephole optimizations on the machine code: +// +// - Optimize Extensions +// +// Optimization of sign / zero extension instructions. It may be extended to +// handle other instructions with similar properties. +// +// On some targets, some instructions, e.g. X86 sign / zero extension, may +// leave the source value in the lower part of the result. This optimization +// will replace some uses of the pre-extension value with uses of the +// sub-register of the results. +// +// - Optimize Comparisons +// +// Optimization of comparison instructions. For instance, in this code: +// +// sub r1, 1 +// cmp r1, 0 +// bz L1 +// +// If the "sub" instruction all ready sets (or could be modified to set) the +// same flag that the "cmp" instruction sets and that "bz" uses, then we can +// eliminate the "cmp" instruction. +// +// Another instance, in this code: +// +// sub r1, r3 | sub r1, imm +// cmp r3, r1 or cmp r1, r3 | cmp r1, imm +// bge L1 +// +// If the branch instruction can use flag from "sub", then we can replace +// "sub" with "subs" and eliminate the "cmp" instruction. +// +// - Optimize Loads: +// +// Loads that can be folded into a later instruction. A load is foldable +// if it loads to virtual registers and the virtual register defined has +// a single use. +// +// - Optimize Copies and Bitcast (more generally, target specific copies): +// +// Rewrite copies and bitcasts to avoid cross register bank copies +// when possible. +// E.g., Consider the following example, where capital and lower +// letters denote different register file: +// b = copy A <-- cross-bank copy +// C = copy b <-- cross-bank copy +// => +// b = copy A <-- cross-bank copy +// C = copy A <-- same-bank copy +// +// E.g., for bitcast: +// b = bitcast A <-- cross-bank copy +// C = bitcast b <-- cross-bank copy +// => +// b = bitcast A <-- cross-bank copy +// C = copy A <-- same-bank copy +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <utility> +using namespace llvm; + +#define DEBUG_TYPE "peephole-opt" + +// Optimize Extensions +static cl::opt<bool> +Aggressive("aggressive-ext-opt", cl::Hidden, + cl::desc("Aggressive extension optimization")); + +static cl::opt<bool> +DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), + cl::desc("Disable the peephole optimizer")); + +static cl::opt<bool> +DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable advanced copy optimization")); + +static cl::opt<bool> DisableNAPhysCopyOpt( + "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable non-allocatable physical register copy optimization")); + +// Limit the number of PHI instructions to process +// in PeepholeOptimizer::getNextSource. +static cl::opt<unsigned> RewritePHILimit( + "rewrite-phi-limit", cl::Hidden, cl::init(10), + cl::desc("Limit the length of PHI chains to lookup")); + +STATISTIC(NumReuse, "Number of extension results reused"); +STATISTIC(NumCmps, "Number of compares eliminated"); +STATISTIC(NumImmFold, "Number of move immediate folded"); +STATISTIC(NumLoadFold, "Number of loads folded"); +STATISTIC(NumSelects, "Number of selects optimized"); +STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); +STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); +STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); + +namespace { + class ValueTrackerResult; + + class PeepholeOptimizer : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + MachineDominatorTree *DT; // Machine dominator tree + + public: + static char ID; // Pass identification + PeepholeOptimizer() : MachineFunctionPass(ID) { + initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + if (Aggressive) { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + } + } + + /// \brief Track Def -> Use info used for rewriting copies. + typedef SmallDenseMap<TargetInstrInfo::RegSubRegPair, ValueTrackerResult> + RewriteMapTy; + + private: + bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); + bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSetImpl<MachineInstr*> &LocalMIs); + bool optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool optimizeCondBranch(MachineInstr *MI); + bool optimizeCoalescableCopy(MachineInstr *MI); + bool optimizeUncoalescableCopy(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs); + bool findNextSource(unsigned Reg, unsigned SubReg, + RewriteMapTy &RewriteMap); + bool isMoveImmediate(MachineInstr *MI, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, + SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr*> &ImmDefMIs); + + /// \brief If copy instruction \p MI is a virtual register copy, track it in + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was + /// previously seen as a copy, replace the uses of this copy with the + /// previously seen copy's destination register. + bool foldRedundantCopy(MachineInstr *MI, + SmallSet<unsigned, 4> &CopySrcRegs, + DenseMap<unsigned, MachineInstr *> &CopyMIs); + + /// \brief Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(unsigned Reg); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool foldRedundantNAPhysCopy( + MachineInstr *MI, + DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs); + + bool isLoadFoldable(MachineInstr *MI, + SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); + + /// \brief Check whether \p MI is understood by the register coalescer + /// but may require some rewriting. + bool isCoalescableCopy(const MachineInstr &MI) { + // SubregToRegs are not interesting, because they are already register + // coalescer friendly. + return MI.isCopy() || (!DisableAdvCopyOpt && + (MI.isRegSequence() || MI.isInsertSubreg() || + MI.isExtractSubreg())); + } + + /// \brief Check whether \p MI is a copy like instruction that is + /// not recognized by the register coalescer. + bool isUncoalescableCopy(const MachineInstr &MI) { + return MI.isBitcast() || + (!DisableAdvCopyOpt && + (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())); + } + }; + + /// \brief Helper class to hold a reply for ValueTracker queries. Contains the + /// returned sources for a given search and the instructions where the sources + /// were tracked from. + class ValueTrackerResult { + private: + /// Track all sources found by one ValueTracker query. + SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs; + + /// Instruction using the sources in 'RegSrcs'. + const MachineInstr *Inst; + + public: + ValueTrackerResult() : Inst(nullptr) {} + ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) { + addSource(Reg, SubReg); + } + + bool isValid() const { return getNumSources() > 0; } + + void setInst(const MachineInstr *I) { Inst = I; } + const MachineInstr *getInst() const { return Inst; } + + void clear() { + RegSrcs.clear(); + Inst = nullptr; + } + + void addSource(unsigned SrcReg, unsigned SrcSubReg) { + RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); + } + + void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { + assert(Idx < getNumSources() && "Reg pair source out of index"); + RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); + } + + int getNumSources() const { return RegSrcs.size(); } + + unsigned getSrcReg(int Idx) const { + assert(Idx < getNumSources() && "Reg source out of index"); + return RegSrcs[Idx].Reg; + } + + unsigned getSrcSubReg(int Idx) const { + assert(Idx < getNumSources() && "SubReg source out of index"); + return RegSrcs[Idx].SubReg; + } + + bool operator==(const ValueTrackerResult &Other) { + if (Other.getInst() != getInst()) + return false; + + if (Other.getNumSources() != getNumSources()) + return false; + + for (int i = 0, e = Other.getNumSources(); i != e; ++i) + if (Other.getSrcReg(i) != getSrcReg(i) || + Other.getSrcSubReg(i) != getSrcSubReg(i)) + return false; + return true; + } + }; + + /// \brief Helper class to track the possible sources of a value defined by + /// a (chain of) copy related instructions. + /// Given a definition (instruction and definition index), this class + /// follows the use-def chain to find successive suitable sources. + /// The given source can be used to rewrite the definition into + /// def = COPY src. + /// + /// For instance, let us consider the following snippet: + /// v0 = + /// v2 = INSERT_SUBREG v1, v0, sub0 + /// def = COPY v2.sub0 + /// + /// Using a ValueTracker for def = COPY v2.sub0 will give the following + /// suitable sources: + /// v2.sub0 and v0. + /// Then, def can be rewritten into def = COPY v0. + class ValueTracker { + private: + /// The current point into the use-def chain. + const MachineInstr *Def; + /// The index of the definition in Def. + unsigned DefIdx; + /// The sub register index of the definition. + unsigned DefSubReg; + /// The register where the value can be found. + unsigned Reg; + /// Specifiy whether or not the value tracking looks through + /// complex instructions. When this is false, the value tracker + /// bails on everything that is not a copy or a bitcast. + /// + /// Note: This could have been implemented as a specialized version of + /// the ValueTracker class but that would have complicated the code of + /// the users of this class. + bool UseAdvancedTracking; + /// MachineRegisterInfo used to perform tracking. + const MachineRegisterInfo &MRI; + /// Optional TargetInstrInfo used to perform some complex + /// tracking. + const TargetInstrInfo *TII; + + /// \brief Dispatcher to the right underlying implementation of + /// getNextSource. + ValueTrackerResult getNextSourceImpl(); + /// \brief Specialized version of getNextSource for Copy instructions. + ValueTrackerResult getNextSourceFromCopy(); + /// \brief Specialized version of getNextSource for Bitcast instructions. + ValueTrackerResult getNextSourceFromBitcast(); + /// \brief Specialized version of getNextSource for RegSequence + /// instructions. + ValueTrackerResult getNextSourceFromRegSequence(); + /// \brief Specialized version of getNextSource for InsertSubreg + /// instructions. + ValueTrackerResult getNextSourceFromInsertSubreg(); + /// \brief Specialized version of getNextSource for ExtractSubreg + /// instructions. + ValueTrackerResult getNextSourceFromExtractSubreg(); + /// \brief Specialized version of getNextSource for SubregToReg + /// instructions. + ValueTrackerResult getNextSourceFromSubregToReg(); + /// \brief Specialized version of getNextSource for PHI instructions. + ValueTrackerResult getNextSourceFromPHI(); + + public: + /// \brief Create a ValueTracker instance for the value defined by \p Reg. + /// \p DefSubReg represents the sub register index the value tracker will + /// track. It does not need to match the sub register index used in the + /// definition of \p Reg. + /// \p UseAdvancedTracking specifies whether or not the value tracker looks + /// through complex instructions. By default (false), it handles only copy + /// and bitcast instructions. + /// If \p Reg is a physical register, a value tracker constructed with + /// this constructor will not find any alternative source. + /// Indeed, when \p Reg is a physical register that constructor does not + /// know which definition of \p Reg it should track. + /// Use the next constructor to track a physical register. + ValueTracker(unsigned Reg, unsigned DefSubReg, + const MachineRegisterInfo &MRI, + bool UseAdvancedTracking = false, + const TargetInstrInfo *TII = nullptr) + : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); + } + } + + /// \brief Create a ValueTracker instance for the value defined by + /// the pair \p MI, \p DefIdx. + /// Unlike the other constructor, the value tracker produced by this one + /// may be able to find a new source when the definition is a physical + /// register. + /// This could be useful to rewrite target specific instructions into + /// generic copy instructions. + ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, + const MachineRegisterInfo &MRI, + bool UseAdvancedTracking = false, + const TargetInstrInfo *TII = nullptr) + : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { + assert(DefIdx < Def->getDesc().getNumDefs() && + Def->getOperand(DefIdx).isReg() && "Invalid definition"); + Reg = Def->getOperand(DefIdx).getReg(); + } + + /// \brief Following the use-def chain, get the next available source + /// for the tracked value. + /// \return A ValueTrackerResult containing a set of registers + /// and sub registers with tracked values. A ValueTrackerResult with + /// an empty set of registers means no source was found. + ValueTrackerResult getNextSource(); + + /// \brief Get the last register where the initial value can be found. + /// Initially this is the register of the definition. + /// Then, after each successful call to getNextSource, this is the + /// register of the last source. + unsigned getReg() const { return Reg; } + }; +} + +char PeepholeOptimizer::ID = 0; +char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; +INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", + "Peephole Optimizations", false, false) + +/// If instruction is a copy-like instruction, i.e. it reads a single register +/// and writes a single register and it does not modify the source, and if the +/// source value is preserved as a sub-register of the result, then replace all +/// reachable uses of the source with the subreg of the result. +/// +/// Do not generate an EXTRACT that is used only in a debug use, as this changes +/// the code. Since this code does not currently share EXTRACTs, just ignore all +/// debug uses. +bool PeepholeOptimizer:: +optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, + SmallPtrSetImpl<MachineInstr*> &LocalMIs) { + unsigned SrcReg, DstReg, SubIdx; + if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) + return false; + + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + if (MRI->hasOneNonDBGUse(SrcReg)) + // No other uses. + return false; + + // Ensure DstReg can get a register class that actually supports + // sub-registers. Don't change the class until we commit. + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); + if (!DstRC) + return false; + + // The ext instr may be operating on a sub-register of SrcReg as well. + // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit + // register. + // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of + // SrcReg:SubIdx should be replaced. + bool UseSrcSubIdx = + TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; + + // The source has other uses. See if we can replace the other uses with use of + // the result of the extension. + SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; + for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) + ReachedBBs.insert(UI.getParent()); + + // Uses that are in the same BB of uses of the result of the instruction. + SmallVector<MachineOperand*, 8> Uses; + + // Uses that the result of the instruction can reach. + SmallVector<MachineOperand*, 8> ExtendedUses; + + bool ExtendLife = true; + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); + if (UseMI == MI) + continue; + + if (UseMI->isPHI()) { + ExtendLife = false; + continue; + } + + // Only accept uses of SrcReg:SubIdx. + if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) + continue; + + // It's an error to translate this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = <sext> %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the <sext>, not the + // original value of %reg1024 before <sext>. + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (UseMBB == MBB) { + // Local uses that come after the extension. + if (!LocalMIs.count(UseMI)) + Uses.push_back(&UseMO); + } else if (ReachedBBs.count(UseMBB)) { + // Non-local uses where the result of the extension is used. Always + // replace these unless it's a PHI. + Uses.push_back(&UseMO); + } else if (Aggressive && DT->dominates(MBB, UseMBB)) { + // We may want to extend the live range of the extension result in order + // to replace these uses. + ExtendedUses.push_back(&UseMO); + } else { + // Both will be live out of the def MBB anyway. Don't extend live range of + // the extension result. + ExtendLife = false; + break; + } + } + + if (ExtendLife && !ExtendedUses.empty()) + // Extend the liveness of the extension result. + Uses.append(ExtendedUses.begin(), ExtendedUses.end()); + + // Now replace all uses. + bool Changed = false; + if (!Uses.empty()) { + SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; + + // Look for PHI uses of the extended result, we don't want to extend the + // liveness of a PHI input. It breaks all kinds of assumptions down + // stream. A PHI use is expected to be the kill of its source values. + for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) + if (UI.isPHI()) + PHIBBs.insert(UI.getParent()); + + const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); + for (unsigned i = 0, e = Uses.size(); i != e; ++i) { + MachineOperand *UseMO = Uses[i]; + MachineInstr *UseMI = UseMO->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (PHIBBs.count(UseMBB)) + continue; + + // About to add uses of DstReg, clear DstReg's kill flags. + if (!Changed) { + MRI->clearKillFlags(DstReg); + MRI->constrainRegClass(DstReg, DstRC); + } + + unsigned NewVR = MRI->createVirtualRegister(RC); + MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); + // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. + if (UseSrcSubIdx) { + Copy->getOperand(0).setSubReg(SubIdx); + Copy->getOperand(0).setIsUndef(); + } + UseMO->setReg(NewVR); + ++NumReuse; + Changed = true; + } + } + + return Changed; +} + +/// If the instruction is a compare and the previous instruction it's comparing +/// against already sets (or could be modified to set) the same flag as the +/// compare, then we can remove the comparison and use the flag from the +/// previous instruction. +bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + // If this instruction is a comparison against zero and isn't comparing a + // physical register, we can try to optimize it. + unsigned SrcReg, SrcReg2; + int CmpMask, CmpValue; + if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) + return false; + + // Attempt to optimize the comparison instruction. + if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { + ++NumCmps; + return true; + } + + return false; +} + +/// Optimize a select instruction. +bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, + SmallPtrSetImpl<MachineInstr *> &LocalMIs) { + unsigned TrueOp = 0; + unsigned FalseOp = 0; + bool Optimizable = false; + SmallVector<MachineOperand, 4> Cond; + if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) + return false; + if (!Optimizable) + return false; + if (!TII->optimizeSelect(MI, LocalMIs)) + return false; + MI->eraseFromParent(); + ++NumSelects; + return true; +} + +/// \brief Check if a simpler conditional branch can be +// generated +bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { + return TII->optimizeCondBranch(MI); +} + +/// \brief Try to find the next source that share the same register file +/// for the value defined by \p Reg and \p SubReg. +/// When true is returned, the \p RewriteMap can be used by the client to +/// retrieve all Def -> Use along the way up to the next source. Any found +/// Use that is not itself a key for another entry, is the next source to +/// use. During the search for the next source, multiple sources can be found +/// given multiple incoming sources of a PHI instruction. In this case, we +/// look in each PHI source for the next source; all found next sources must +/// share the same register file as \p Reg and \p SubReg. The client should +/// then be capable to rewrite all intermediate PHIs to get the next source. +/// \return False if no alternative sources are available. True otherwise. +bool PeepholeOptimizer::findNextSource(unsigned Reg, unsigned SubReg, + RewriteMapTy &RewriteMap) { + // Do not try to find a new source for a physical register. + // So far we do not have any motivating example for doing that. + // Thus, instead of maintaining untested code, we will revisit that if + // that changes at some point. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return false; + const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); + + SmallVector<TargetInstrInfo::RegSubRegPair, 4> SrcToLook; + TargetInstrInfo::RegSubRegPair CurSrcPair(Reg, SubReg); + SrcToLook.push_back(CurSrcPair); + + unsigned PHICount = 0; + while (!SrcToLook.empty() && PHICount < RewritePHILimit) { + TargetInstrInfo::RegSubRegPair Pair = SrcToLook.pop_back_val(); + // As explained above, do not handle physical registers + if (TargetRegisterInfo::isPhysicalRegister(Pair.Reg)) + return false; + + CurSrcPair = Pair; + ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, + !DisableAdvCopyOpt, TII); + ValueTrackerResult Res; + bool ShouldRewrite = false; + + do { + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + Res = ValTracker.getNextSource(); + if (!Res.isValid()) + break; + + // Insert the Def -> Use entry for the recently found source. + ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair); + if (CurSrcRes.isValid()) { + assert(CurSrcRes == Res && "ValueTrackerResult found must match"); + // An existent entry with multiple sources is a PHI cycle we must avoid. + // Otherwise it's an entry with a valid next source we already found. + if (CurSrcRes.getNumSources() > 1) { + DEBUG(dbgs() << "findNextSource: found PHI cycle, aborting...\n"); + return false; + } + break; + } + RewriteMap.insert(std::make_pair(CurSrcPair, Res)); + + // ValueTrackerResult usually have one source unless it's the result from + // a PHI instruction. Add the found PHI edges to be looked up further. + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs > 1) { + PHICount++; + for (unsigned i = 0; i < NumSrcs; ++i) + SrcToLook.push_back(TargetInstrInfo::RegSubRegPair( + Res.getSrcReg(i), Res.getSrcSubReg(i))); + break; + } + + CurSrcPair.Reg = Res.getSrcReg(0); + CurSrcPair.SubReg = Res.getSrcSubReg(0); + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. Moreover, if we want to extend + // the live-range of a physical register, unlike SSA virtual register, + // we will have to check that they aren't redefine before the related use. + if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg)) + return false; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg); + ShouldRewrite = TRI->shouldRewriteCopySrc(DefRC, SubReg, SrcRC, + CurSrcPair.SubReg); + } while (!ShouldRewrite); + + // Continue looking for new sources... + if (Res.isValid()) + continue; + + // Do not continue searching for a new source if the there's at least + // one use-def which cannot be rewritten. + if (!ShouldRewrite) + return false; + } + + if (PHICount >= RewritePHILimit) { + DEBUG(dbgs() << "findNextSource: PHI limit reached\n"); + return false; + } + + // If we did not find a more suitable source, there is nothing to optimize. + return CurSrcPair.Reg != Reg; +} + +/// \brief Insert a PHI instruction with incoming edges \p SrcRegs that are +/// guaranteed to have the same register class. This is necessary whenever we +/// successfully traverse a PHI instruction and find suitable sources coming +/// from its edges. By inserting a new PHI, we provide a rewritten PHI def +/// suitable to be used in a new COPY instruction. +static MachineInstr * +insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + const SmallVectorImpl<TargetInstrInfo::RegSubRegPair> &SrcRegs, + MachineInstr *OrigPHI) { + assert(!SrcRegs.empty() && "No sources to create a PHI instruction?"); + + const TargetRegisterClass *NewRC = MRI->getRegClass(SrcRegs[0].Reg); + unsigned NewVR = MRI->createVirtualRegister(NewRC); + MachineBasicBlock *MBB = OrigPHI->getParent(); + MachineInstrBuilder MIB = BuildMI(*MBB, OrigPHI, OrigPHI->getDebugLoc(), + TII->get(TargetOpcode::PHI), NewVR); + + unsigned MBBOpIdx = 2; + for (auto RegPair : SrcRegs) { + MIB.addReg(RegPair.Reg, 0, RegPair.SubReg); + MIB.addMBB(OrigPHI->getOperand(MBBOpIdx).getMBB()); + // Since we're extended the lifetime of RegPair.Reg, clear the + // kill flags to account for that and make RegPair.Reg reaches + // the new PHI. + MRI->clearKillFlags(RegPair.Reg); + MBBOpIdx += 2; + } + + return MIB; +} + +namespace { +/// \brief Helper class to rewrite the arguments of a copy-like instruction. +class CopyRewriter { +protected: + /// The copy-like instruction. + MachineInstr &CopyLike; + /// The index of the source being rewritten. + unsigned CurrentSrcIdx; + +public: + CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} + + virtual ~CopyRewriter() {} + + /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and + /// the related value that it affects (TrackReg, TrackSubReg). + /// A source is considered rewritable if its register class and the + /// register class of the related TrackReg may not be register + /// coalescer friendly. In other words, given a copy-like instruction + /// not all the arguments may be returned at rewritable source, since + /// some arguments are none to be register coalescer friendly. + /// + /// Each call of this method moves the current source to the next + /// rewritable source. + /// For instance, let CopyLike be the instruction to rewrite. + /// CopyLike has one definition and one source: + /// dst.dstSubIdx = CopyLike src.srcSubIdx. + /// + /// The first call will give the first rewritable source, i.e., + /// the only source this instruction has: + /// (SrcReg, SrcSubReg) = (src, srcSubIdx). + /// This source defines the whole definition, i.e., + /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). + /// + /// The second and subsequent calls will return false, as there is only one + /// rewritable source. + /// + /// \return True if a rewritable source has been found, false otherwise. + /// The output arguments are valid if and only if true is returned. + virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) { + // If CurrentSrcIdx == 1, this means this function has already been called + // once. CopyLike has one definition and one argument, thus, there is + // nothing else to rewrite. + if (!CopyLike.isCopy() || CurrentSrcIdx == 1) + return false; + // This is the first call to getNextRewritableSource. + // Move the CurrentSrcIdx to remember that we made that call. + CurrentSrcIdx = 1; + // The rewritable source is the argument. + const MachineOperand &MOSrc = CopyLike.getOperand(1); + SrcReg = MOSrc.getReg(); + SrcSubReg = MOSrc.getSubReg(); + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + return true; + } + + /// \brief Rewrite the current source with \p NewReg and \p NewSubReg + /// if possible. + /// \return True if the rewriting was possible, false otherwise. + virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { + if (!CopyLike.isCopy() || CurrentSrcIdx != 1) + return false; + MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); + MOSrc.setReg(NewReg); + MOSrc.setSubReg(NewSubReg); + return true; + } + + /// \brief Given a \p Def.Reg and Def.SubReg pair, use \p RewriteMap to find + /// the new source to use for rewrite. If \p HandleMultipleSources is true and + /// multiple sources for a given \p Def are found along the way, we found a + /// PHI instructions that needs to be rewritten. + /// TODO: HandleMultipleSources should be removed once we test PHI handling + /// with coalescable copies. + TargetInstrInfo::RegSubRegPair + getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII, + TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap, + bool HandleMultipleSources = true) { + + TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg); + do { + ValueTrackerResult Res = RewriteMap.lookup(LookupSrc); + // If there are no entries on the map, LookupSrc is the new source. + if (!Res.isValid()) + return LookupSrc; + + // There's only one source for this definition, keep searching... + unsigned NumSrcs = Res.getNumSources(); + if (NumSrcs == 1) { + LookupSrc.Reg = Res.getSrcReg(0); + LookupSrc.SubReg = Res.getSrcSubReg(0); + continue; + } + + // TODO: Remove once multiple srcs w/ coalescable copies are supported. + if (!HandleMultipleSources) + break; + + // Multiple sources, recurse into each source to find a new source + // for it. Then, rewrite the PHI accordingly to its new edges. + SmallVector<TargetInstrInfo::RegSubRegPair, 4> NewPHISrcs; + for (unsigned i = 0; i < NumSrcs; ++i) { + TargetInstrInfo::RegSubRegPair PHISrc(Res.getSrcReg(i), + Res.getSrcSubReg(i)); + NewPHISrcs.push_back( + getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources)); + } + + // Build the new PHI node and return its def register as the new source. + MachineInstr *OrigPHI = const_cast<MachineInstr *>(Res.getInst()); + MachineInstr *NewPHI = insertPHI(MRI, TII, NewPHISrcs, OrigPHI); + DEBUG(dbgs() << "-- getNewSource\n"); + DEBUG(dbgs() << " Replacing: " << *OrigPHI); + DEBUG(dbgs() << " With: " << *NewPHI); + const MachineOperand &MODef = NewPHI->getOperand(0); + return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg()); + + } while (1); + + return TargetInstrInfo::RegSubRegPair(0, 0); + } + + /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap + /// and create a new COPY instruction. More info about RewriteMap in + /// PeepholeOptimizer::findNextSource. Right now this is only used to handle + /// Uncoalescable copies, since they are copy like instructions that aren't + /// recognized by the register allocator. + virtual MachineInstr * + RewriteSource(TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap) { + return nullptr; + } +}; + +/// \brief Helper class to rewrite uncoalescable copy like instructions +/// into new COPY (coalescable friendly) instructions. +class UncoalescableRewriter : public CopyRewriter { +protected: + const TargetInstrInfo &TII; + MachineRegisterInfo &MRI; + /// The number of defs in the bitcast + unsigned NumDefs; + +public: + UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) + : CopyRewriter(MI), TII(TII), MRI(MRI) { + NumDefs = MI.getDesc().getNumDefs(); + } + + /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) + /// All such sources need to be considered rewritable in order to + /// rewrite a uncoalescable copy-like instruction. This method return + /// each definition that must be checked if rewritable. + /// + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // Find the next non-dead definition and continue from there. + if (CurrentSrcIdx == NumDefs) + return false; + + while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { + ++CurrentSrcIdx; + if (CurrentSrcIdx == NumDefs) + return false; + } + + // What we track are the alternative sources of the definition. + const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + + CurrentSrcIdx++; + return true; + } + + /// \brief Rewrite the source found through \p Def, by using the \p RewriteMap + /// and create a new COPY instruction. More info about RewriteMap in + /// PeepholeOptimizer::findNextSource. Right now this is only used to handle + /// Uncoalescable copies, since they are copy like instructions that aren't + /// recognized by the register allocator. + MachineInstr * + RewriteSource(TargetInstrInfo::RegSubRegPair Def, + PeepholeOptimizer::RewriteMapTy &RewriteMap) override { + assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && + "We do not rewrite physical registers"); + + // Find the new source to use in the COPY rewrite. + TargetInstrInfo::RegSubRegPair NewSrc = + getNewSource(&MRI, &TII, Def, RewriteMap); + + // Insert the COPY. + const TargetRegisterClass *DefRC = MRI.getRegClass(Def.Reg); + unsigned NewVR = MRI.createVirtualRegister(DefRC); + + MachineInstr *NewCopy = + BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), + TII.get(TargetOpcode::COPY), NewVR) + .addReg(NewSrc.Reg, 0, NewSrc.SubReg); + + NewCopy->getOperand(0).setSubReg(Def.SubReg); + if (Def.SubReg) + NewCopy->getOperand(0).setIsUndef(); + + DEBUG(dbgs() << "-- RewriteSource\n"); + DEBUG(dbgs() << " Replacing: " << CopyLike); + DEBUG(dbgs() << " With: " << *NewCopy); + MRI.replaceRegWith(Def.Reg, NewVR); + MRI.clearKillFlags(NewVR); + + // We extended the lifetime of NewSrc.Reg, clear the kill flags to + // account for that. + MRI.clearKillFlags(NewSrc.Reg); + + return NewCopy; + } +}; + +/// \brief Specialized rewriter for INSERT_SUBREG instruction. +class InsertSubregRewriter : public CopyRewriter { +public: + InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { + assert(MI.isInsertSubreg() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. + /// Src1 has the same register class has dst, hence, there is + /// nothing to rewrite. + /// Src2.src2SubIdx, may not be register coalescer friendly. + /// Therefore, the first call to this method returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx). + /// + /// Subsequence calls will return false. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 2) + return false; + // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. + CurrentSrcIdx = 2; + const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); + SrcReg = MOInsertedReg.getReg(); + SrcSubReg = MOInsertedReg.getSubReg(); + const MachineOperand &MODef = CopyLike.getOperand(0); + + // We want to track something that is compatible with the + // partial definition. + TrackReg = MODef.getReg(); + if (MODef.getSubReg()) + // Bail if we have to compose sub-register indices. + return false; + TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); + return true; + } + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + if (CurrentSrcIdx != 2) + return false; + // We are rewriting the inserted reg. + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for EXTRACT_SUBREG instruction. +class ExtractSubregRewriter : public CopyRewriter { + const TargetInstrInfo &TII; + +public: + ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) + : CopyRewriter(MI), TII(TII) { + assert(MI.isExtractSubreg() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. + /// There is only one rewritable source: Src.subIdx, + /// which defines dst.dstSubIdx. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // If we already get the only source we can rewrite, return false. + if (CurrentSrcIdx == 1) + return false; + // We are looking at v1 = EXTRACT_SUBREG v0, sub0. + CurrentSrcIdx = 1; + const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); + SrcReg = MOExtractedReg.getReg(); + // If we have to compose sub-register indices, bail out. + if (MOExtractedReg.getSubReg()) + return false; + + SrcSubReg = CopyLike.getOperand(2).getImm(); + + // We want to track something that is compatible with the definition. + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + TrackSubReg = MODef.getSubReg(); + return true; + } + + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + // The only source we can rewrite is the input register. + if (CurrentSrcIdx != 1) + return false; + + CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); + + // If we find a source that does not require to extract something, + // rewrite the operation with a copy. + if (!NewSubReg) { + // Move the current index to an invalid position. + // We do not want another call to this method to be able + // to do any change. + CurrentSrcIdx = -1; + // Rewrite the operation as a COPY. + // Get rid of the sub-register index. + CopyLike.RemoveOperand(2); + // Morph the operation into a COPY. + CopyLike.setDesc(TII.get(TargetOpcode::COPY)); + return true; + } + CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); + return true; + } +}; + +/// \brief Specialized rewriter for REG_SEQUENCE instruction. +class RegSequenceRewriter : public CopyRewriter { +public: + RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { + assert(MI.isRegSequence() && "Invalid instruction"); + } + + /// \brief See CopyRewriter::getNextRewritableSource. + /// Here CopyLike has the following form: + /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. + /// Each call will return a different source, walking all the available + /// source. + /// + /// The first call returns: + /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx1). + /// + /// The second call returns: + /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). + /// (TrackReg, TrackSubReg) = (dst, subIdx2). + /// + /// And so on, until all the sources have been traversed, then + /// it returns false. + bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, + unsigned &TrackReg, + unsigned &TrackSubReg) override { + // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. + + // If this is the first call, move to the first argument. + if (CurrentSrcIdx == 0) { + CurrentSrcIdx = 1; + } else { + // Otherwise, move to the next argument and check that it is valid. + CurrentSrcIdx += 2; + if (CurrentSrcIdx >= CopyLike.getNumOperands()) + return false; + } + const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); + SrcReg = MOInsertedReg.getReg(); + // If we have to compose sub-register indices, bail out. + if ((SrcSubReg = MOInsertedReg.getSubReg())) + return false; + + // We want to track something that is compatible with the related + // partial definition. + TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); + + const MachineOperand &MODef = CopyLike.getOperand(0); + TrackReg = MODef.getReg(); + // If we have to compose sub-registers, bail. + return MODef.getSubReg() == 0; + } + + bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { + // We cannot rewrite out of bound operands. + // Moreover, rewritable sources are at odd positions. + if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) + return false; + + MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); + MO.setReg(NewReg); + MO.setSubReg(NewSubReg); + return true; + } +}; +} // End namespace. + +/// \brief Get the appropriated CopyRewriter for \p MI. +/// \return A pointer to a dynamically allocated CopyRewriter or nullptr +/// if no rewriter works for \p MI. +static CopyRewriter *getCopyRewriter(MachineInstr &MI, + const TargetInstrInfo &TII, + MachineRegisterInfo &MRI) { + // Handle uncoalescable copy-like instructions. + if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || + MI.isExtractSubregLike())) + return new UncoalescableRewriter(MI, TII, MRI); + + switch (MI.getOpcode()) { + default: + return nullptr; + case TargetOpcode::COPY: + return new CopyRewriter(MI); + case TargetOpcode::INSERT_SUBREG: + return new InsertSubregRewriter(MI); + case TargetOpcode::EXTRACT_SUBREG: + return new ExtractSubregRewriter(MI, TII); + case TargetOpcode::REG_SEQUENCE: + return new RegSequenceRewriter(MI); + } + llvm_unreachable(nullptr); +} + +/// \brief Optimize generic copy instructions to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and tries to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstrain the register allocator, but replaces some operands +/// when possible. +/// \pre isCoalescableCopy(*MI) is true. +/// \return True, when \p MI has been rewritten. False otherwise. +bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { + assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); + assert(MI->getDesc().getNumDefs() == 1 && + "Coalescer can understand multiple defs?!"); + const MachineOperand &MODef = MI->getOperand(0); + // Do not rewrite physical definitions. + if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) + return false; + + bool Changed = false; + // Get the right rewriter for the current copy. + std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + // If none exists, bail out. + if (!CpyRewriter) + return false; + // Rewrite each rewritable source. + unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; + while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, + TrackSubReg)) { + // Keep track of PHI nodes and its incoming edges when looking for sources. + RewriteMapTy RewriteMap; + // Try to find a more suitable source. If we failed to do so, or get the + // actual source, move to the next source. + if (!findNextSource(TrackReg, TrackSubReg, RewriteMap)) + continue; + + // Get the new source to rewrite. TODO: Only enable handling of multiple + // sources (PHIs) once we have a motivating example and testcases for it. + TargetInstrInfo::RegSubRegPair TrackPair(TrackReg, TrackSubReg); + TargetInstrInfo::RegSubRegPair NewSrc = CpyRewriter->getNewSource( + MRI, TII, TrackPair, RewriteMap, false /* multiple sources */); + if (SrcReg == NewSrc.Reg || NewSrc.Reg == 0) + continue; + + // Rewrite source. + if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) { + // We may have extended the live-range of NewSrc, account for that. + MRI->clearKillFlags(NewSrc.Reg); + Changed = true; + } + } + // TODO: We could have a clean-up method to tidy the instruction. + // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 + // => v0 = COPY v1 + // Currently we haven't seen motivating example for that and we + // want to avoid untested code. + NumRewrittenCopies += Changed; + return Changed; +} + +/// \brief Optimize copy-like instructions to create +/// register coalescer friendly instruction. +/// The optimization tries to kill-off the \p MI by looking +/// through a chain of copies to find a source that has a compatible +/// register class. +/// If such a source is found, it replace \p MI by a generic COPY +/// operation. +/// \pre isUncoalescableCopy(*MI) is true. +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +/// All COPY instructions created, are inserted in \p LocalMIs. +bool PeepholeOptimizer::optimizeUncoalescableCopy( + MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { + assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); + + // Check if we can rewrite all the values defined by this instruction. + SmallVector<TargetInstrInfo::RegSubRegPair, 4> RewritePairs; + // Get the right rewriter for the current copy. + std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); + // If none exists, bail out. + if (!CpyRewriter) + return false; + + // Rewrite each rewritable source by generating new COPYs. This works + // differently from optimizeCoalescableCopy since it first makes sure that all + // definitions can be rewritten. + RewriteMapTy RewriteMap; + unsigned Reg, SubReg, CopyDefReg, CopyDefSubReg; + while (CpyRewriter->getNextRewritableSource(Reg, SubReg, CopyDefReg, + CopyDefSubReg)) { + // If a physical register is here, this is probably for a good reason. + // Do not rewrite that. + if (TargetRegisterInfo::isPhysicalRegister(CopyDefReg)) + return false; + + // If we do not know how to rewrite this definition, there is no point + // in trying to kill this instruction. + TargetInstrInfo::RegSubRegPair Def(CopyDefReg, CopyDefSubReg); + if (!findNextSource(Def.Reg, Def.SubReg, RewriteMap)) + return false; + + RewritePairs.push_back(Def); + } + + // The change is possible for all defs, do it. + for (const auto &Def : RewritePairs) { + // Rewrite the "copy" in a way the register coalescer understands. + MachineInstr *NewCopy = CpyRewriter->RewriteSource(Def, RewriteMap); + assert(NewCopy && "Should be able to always generate a new copy"); + LocalMIs.insert(NewCopy); + } + + // MI is now dead. + MI->eraseFromParent(); + ++NumUncoalescableCopies; + return true; +} + +/// Check whether MI is a candidate for folding into a later instruction. +/// We only fold loads to virtual registers and the virtual register defined +/// has a single use. +bool PeepholeOptimizer::isLoadFoldable( + MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { + if (!MI->canFoldAsLoad() || !MI->mayLoad()) + return false; + const MCInstrDesc &MCID = MI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + + unsigned Reg = MI->getOperand(0).getReg(); + // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting + // loads. It should be checked when processing uses of the load, since + // uses can be removed during peephole. + if (!MI->getOperand(0).getSubReg() && + TargetRegisterInfo::isVirtualRegister(Reg) && + MRI->hasOneNonDBGUse(Reg)) { + FoldAsLoadDefCandidates.insert(Reg); + return true; + } + return false; +} + +bool PeepholeOptimizer::isMoveImmediate( + MachineInstr *MI, SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { + const MCInstrDesc &MCID = MI->getDesc(); + if (!MI->isMoveImmediate()) + return false; + if (MCID.getNumDefs() != 1) + return false; + unsigned Reg = MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + ImmDefMIs.insert(std::make_pair(Reg, MI)); + ImmDefRegs.insert(Reg); + return true; + } + + return false; +} + +/// Try folding register operands that are defined by move immediate +/// instructions, i.e. a trivial constant folding optimization, if +/// and only if the def and use are in the same BB. +bool PeepholeOptimizer::foldImmediate( + MachineInstr *MI, MachineBasicBlock *MBB, SmallSet<unsigned, 4> &ImmDefRegs, + DenseMap<unsigned, MachineInstr *> &ImmDefMIs) { + for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (ImmDefRegs.count(Reg) == 0) + continue; + DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); + assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); + if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { + ++NumImmFold; + return true; + } + } + return false; +} + +// FIXME: This is very simple and misses some cases which should be handled when +// motivating examples are found. +// +// The copy rewriting logic should look at uses as well as defs and be able to +// eliminate copies across blocks. +// +// Later copies that are subregister extracts will also not be eliminated since +// only the first copy is considered. +// +// e.g. +// %vreg1 = COPY %vreg0 +// %vreg2 = COPY %vreg0:sub1 +// +// Should replace %vreg2 uses with %vreg1:sub1 +bool PeepholeOptimizer::foldRedundantCopy( + MachineInstr *MI, SmallSet<unsigned, 4> &CopySrcRegs, + DenseMap<unsigned, MachineInstr *> &CopyMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + return false; + + if (CopySrcRegs.insert(SrcReg).second) { + // First copy of this reg seen. + CopyMIs.insert(std::make_pair(SrcReg, MI)); + return false; + } + + MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second; + + unsigned SrcSubReg = MI->getOperand(1).getSubReg(); + unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg(); + + // Can't replace different subregister extracts. + if (SrcSubReg != PrevSrcSubReg) + return false; + + unsigned PrevDstReg = PrevCopy->getOperand(0).getReg(); + + // Only replace if the copy register class is the same. + // + // TODO: If we have multiple copies to different register classes, we may want + // to track multiple copies of the same source register. + if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg)) + return false; + + MRI->replaceRegWith(DstReg, PrevDstReg); + + // Lifetime of the previous copy has been extended. + MRI->clearKillFlags(PrevDstReg); + return true; +} + +bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { + return TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI->isAllocatable(Reg); +} + +bool PeepholeOptimizer::foldRedundantNAPhysCopy( + MachineInstr *MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + if (DisableNAPhysCopyOpt) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + // %vreg = COPY %PHYSREG + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. + NAPhysToVirtMIs.insert({SrcReg, MI}); + return false; + } + + if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + return false; + + // %PHYSREG = COPY %vreg + auto PrevCopy = NAPhysToVirtMIs.find(DstReg); + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. + DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI + << '\n'); + return false; + } + + unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. + DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); + ++NumNAPhysCopies; + return true; + } + + // Potential missed optimization opportunity: we saw a different virtual + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. + DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); + NAPhysToVirtMIs.erase(PrevCopy); + return false; +} + +bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); + + if (DisablePeephole) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; + + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + bool SeenMoveImm = false; + + // During this forward scan, at some point it needs to answer the question + // "given a pointer to an MI in the current BB, is it located before or + // after the current instruction". + // To perform this, the following set keeps track of the MIs already seen + // during the scan, if a MI is not in the set, it is assumed to be located + // after. Newly created MIs have to be inserted in the set as well. + SmallPtrSet<MachineInstr*, 16> LocalMIs; + SmallSet<unsigned, 4> ImmDefRegs; + DenseMap<unsigned, MachineInstr*> ImmDefMIs; + SmallSet<unsigned, 16> FoldAsLoadDefCandidates; + + // Track when a non-allocatable physical register is copied to a virtual + // register so that useless moves can be removed. + // + // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` + // without any intervening re-definition of %PHYSREG. + DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs; + + // Set of virtual registers that are copied from. + SmallSet<unsigned, 4> CopySrcRegs; + DenseMap<unsigned, MachineInstr *> CopySrcMIs; + + for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end(); + MII != MIE; ) { + MachineInstr *MI = &*MII; + // We may be erasing MI below, increment MII now. + ++MII; + LocalMIs.insert(MI); + + // Skip debug values. They should not affect this peephole optimization. + if (MI->isDebugValue()) + continue; + + // If we run into an instruction we can't fold across, discard + // the load candidates. + if (MI->isLoadFoldBarrier()) + FoldAsLoadDefCandidates.clear(); + + if (MI->isPosition() || MI->isPHI()) + continue; + + if (!MI->isCopy()) { + for (const auto &Op : MI->operands()) { + // Visit all operands: definitions can be implicit or explicit. + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Op.isDef() && isNAPhysCopy(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } else if (Op.isRegMask()) { + const uint32_t *RegMask = Op.getRegMask(); + for (auto &RegMI : NAPhysToVirtMIs) { + unsigned Def = RegMI.first; + if (MachineOperand::clobbersPhysReg(RegMask, Def)) { + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } + } + } + + if (MI->isImplicitDef() || MI->isKill()) + continue; + + if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { + // Blow away all non-allocatable physical registers knowledge since we + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. + DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI + << '\n'); + NAPhysToVirtMIs.clear(); + continue; + } + + if ((isUncoalescableCopy(*MI) && + optimizeUncoalescableCopy(MI, LocalMIs)) || + (MI->isCompare() && optimizeCmpInstr(MI, &MBB)) || + (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { + // MI is deleted. + LocalMIs.erase(MI); + Changed = true; + continue; + } + + if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { + Changed = true; + continue; + } + + if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { + // MI is just rewritten. + Changed = true; + continue; + } + + if (MI->isCopy() && + (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { + LocalMIs.erase(MI); + MI->eraseFromParent(); + Changed = true; + continue; + } + + if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { + SeenMoveImm = true; + } else { + Changed |= optimizeExtInstr(MI, &MBB, LocalMIs); + // optimizeExtInstr might have created new instructions after MI + // and before the already incremented MII. Adjust MII so that the + // next iteration sees the new instructions. + MII = MI; + ++MII; + if (SeenMoveImm) + Changed |= foldImmediate(MI, &MBB, ImmDefRegs, ImmDefMIs); + } + + // Check whether MI is a load candidate for folding into a later + // instruction. If MI is not a candidate, check whether we can fold an + // earlier load into MI. + if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && + !FoldAsLoadDefCandidates.empty()) { + const MCInstrDesc &MIDesc = MI->getDesc(); + for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); + ++i) { + const MachineOperand &MOp = MI->getOperand(i); + if (!MOp.isReg()) + continue; + unsigned FoldAsLoadDefReg = MOp.getReg(); + if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { + // We need to fold load after optimizeCmpInstr, since + // optimizeCmpInstr can enable folding by converting SUB to CMP. + // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and + // we need it for markUsesInDebugValueAsUndef(). + unsigned FoldedReg = FoldAsLoadDefReg; + MachineInstr *DefMI = nullptr; + MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, + FoldAsLoadDefReg, + DefMI); + if (FoldMI) { + // Update LocalMIs since we replaced MI with FoldMI and deleted + // DefMI. + DEBUG(dbgs() << "Replacing: " << *MI); + DEBUG(dbgs() << " With: " << *FoldMI); + LocalMIs.erase(MI); + LocalMIs.erase(DefMI); + LocalMIs.insert(FoldMI); + MI->eraseFromParent(); + DefMI->eraseFromParent(); + MRI->markUsesInDebugValueAsUndef(FoldedReg); + FoldAsLoadDefCandidates.erase(FoldedReg); + ++NumLoadFold; + // MI is replaced with FoldMI. + Changed = true; + break; + } + } + } + } + } + } + + return Changed; +} + +ValueTrackerResult ValueTracker::getNextSourceFromCopy() { + assert(Def->isCopy() && "Invalid definition"); + // Copy instruction are supposed to be: Def = Src. + // If someone breaks this assumption, bad things will happen everywhere. + assert(Def->getNumOperands() == 2 && "Invalid number of operands"); + + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of src. + // Bails as we do not support composing subregs yet. + return ValueTrackerResult(); + // Otherwise, we want the whole source. + const MachineOperand &Src = Def->getOperand(1); + return ValueTrackerResult(Src.getReg(), Src.getSubReg()); +} + +ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { + assert(Def->isBitcast() && "Invalid definition"); + + // Bail if there are effects that a plain copy will not expose. + if (Def->hasUnmodeledSideEffects()) + return ValueTrackerResult(); + + // Bitcasts with more than one def are not supported. + if (Def->getDesc().getNumDefs() != 1) + return ValueTrackerResult(); + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of the src. + // Bails as we do not support composing subregs yet. + return ValueTrackerResult(); + + unsigned SrcIdx = Def->getNumOperands(); + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; + ++OpIdx) { + const MachineOperand &MO = Def->getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; + assert(!MO.isDef() && "We should have skipped all the definitions by now"); + if (SrcIdx != EndOpIdx) + // Multiple sources? + return ValueTrackerResult(); + SrcIdx = OpIdx; + } + const MachineOperand &Src = Def->getOperand(SrcIdx); + return ValueTrackerResult(Src.getReg(), Src.getSubReg()); +} + +ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { + assert((Def->isRegSequence() || Def->isRegSequenceLike()) && + "Invalid definition"); + + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subregs, bail out. + // The case we are checking is Def.<subreg> = REG_SEQUENCE. + // This should almost never happen as the SSA property is tracked at + // the register level (as opposed to the subreg level). + // I.e., + // Def.sub0 = + // Def.sub1 = + // is a valid SSA representation for Def.sub0 and Def.sub1, but not for + // Def. Thus, it must not be generated. + // However, some code could theoretically generates a single + // Def.sub0 (i.e, not defining the other subregs) and we would + // have this case. + // If we can ascertain (or force) that this never happens, we could + // turn that into an assertion. + return ValueTrackerResult(); + + if (!TII) + // We could handle the REG_SEQUENCE here, but we do not want to + // duplicate the code from the generic TII. + return ValueTrackerResult(); + + SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; + if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) + return ValueTrackerResult(); + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + // Check if one of the operand defines the subreg we are interested in. + for (auto &RegSeqInput : RegSeqInputRegs) { + if (RegSeqInput.SubIdx == DefSubReg) { + if (RegSeqInput.SubReg) + // Bail if we have to compose sub registers. + return ValueTrackerResult(); + + return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); + } + } + + // If the subreg we are tracking is super-defined by another subreg, + // we could follow this value. However, this would require to compose + // the subreg and we do not do that for now. + return ValueTrackerResult(); +} + +ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { + assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && + "Invalid definition"); + + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bail out. + // Same remark as getNextSourceFromRegSequence. + // I.e., this may be turned into an assert. + return ValueTrackerResult(); + + if (!TII) + // We could handle the REG_SEQUENCE here, but we do not want to + // duplicate the code from the generic TII. + return ValueTrackerResult(); + + TargetInstrInfo::RegSubRegPair BaseReg; + TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; + if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) + return ValueTrackerResult(); + + // We are looking at: + // Def = INSERT_SUBREG v0, v1, sub1 + // There are two cases: + // 1. DefSubReg == sub1, get v1. + // 2. DefSubReg != sub1, the value may be available through v0. + + // #1 Check if the inserted register matches the required sub index. + if (InsertedReg.SubIdx == DefSubReg) { + return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); + } + // #2 Otherwise, if the sub register we are looking for is not partial + // defined by the inserted element, we can look through the main + // register (v0). + const MachineOperand &MODef = Def->getOperand(DefIdx); + // If the result register (Def) and the base register (v0) do not + // have the same register class or if we have to compose + // subregisters, bail out. + if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || + BaseReg.SubReg) + return ValueTrackerResult(); + + // Get the TRI and check if the inserted sub-register overlaps with the + // sub-register we are tracking. + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + if (!TRI || + (TRI->getSubRegIndexLaneMask(DefSubReg) & + TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) + return ValueTrackerResult(); + // At this point, the value is available in v0 via the same subreg + // we used for Def. + return ValueTrackerResult(BaseReg.Reg, DefSubReg); +} + +ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { + assert((Def->isExtractSubreg() || + Def->isExtractSubregLike()) && "Invalid definition"); + // We are looking at: + // Def = EXTRACT_SUBREG v0, sub0 + + // Bail if we have to compose sub registers. + // Indeed, if DefSubReg != 0, we would have to compose it with sub0. + if (DefSubReg) + return ValueTrackerResult(); + + if (!TII) + // We could handle the EXTRACT_SUBREG here, but we do not want to + // duplicate the code from the generic TII. + return ValueTrackerResult(); + + TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; + if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) + return ValueTrackerResult(); + + // Bail if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. + if (ExtractSubregInputReg.SubReg) + return ValueTrackerResult(); + // Otherwise, the value is available in the v0.sub0. + return ValueTrackerResult(ExtractSubregInputReg.Reg, + ExtractSubregInputReg.SubIdx); +} + +ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { + assert(Def->isSubregToReg() && "Invalid definition"); + // We are looking at: + // Def = SUBREG_TO_REG Imm, v0, sub0 + + // Bail if we have to compose sub registers. + // If DefSubReg != sub0, we would have to check that all the bits + // we track are included in sub0 and if yes, we would have to + // determine the right subreg in v0. + if (DefSubReg != Def->getOperand(3).getImm()) + return ValueTrackerResult(); + // Bail if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose it with sub0. + if (Def->getOperand(2).getSubReg()) + return ValueTrackerResult(); + + return ValueTrackerResult(Def->getOperand(2).getReg(), + Def->getOperand(3).getImm()); +} + +/// \brief Explore each PHI incoming operand and return its sources +ValueTrackerResult ValueTracker::getNextSourceFromPHI() { + assert(Def->isPHI() && "Invalid definition"); + ValueTrackerResult Res; + + // If we look for a different subreg, bail as we do not support composing + // subregs yet. + if (Def->getOperand(0).getSubReg() != DefSubReg) + return ValueTrackerResult(); + + // Return all register sources for PHI instructions. + for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) { + auto &MO = Def->getOperand(i); + assert(MO.isReg() && "Invalid PHI instruction"); + Res.addSource(MO.getReg(), MO.getSubReg()); + } + + return Res; +} + +ValueTrackerResult ValueTracker::getNextSourceImpl() { + assert(Def && "This method needs a valid definition"); + + assert( + (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && + Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); + if (Def->isCopy()) + return getNextSourceFromCopy(); + if (Def->isBitcast()) + return getNextSourceFromBitcast(); + // All the remaining cases involve "complex" instructions. + // Bail if we did not ask for the advanced tracking. + if (!UseAdvancedTracking) + return ValueTrackerResult(); + if (Def->isRegSequence() || Def->isRegSequenceLike()) + return getNextSourceFromRegSequence(); + if (Def->isInsertSubreg() || Def->isInsertSubregLike()) + return getNextSourceFromInsertSubreg(); + if (Def->isExtractSubreg() || Def->isExtractSubregLike()) + return getNextSourceFromExtractSubreg(); + if (Def->isSubregToReg()) + return getNextSourceFromSubregToReg(); + if (Def->isPHI()) + return getNextSourceFromPHI(); + return ValueTrackerResult(); +} + +ValueTrackerResult ValueTracker::getNextSource() { + // If we reach a point where we cannot move up in the use-def chain, + // there is nothing we can get. + if (!Def) + return ValueTrackerResult(); + + ValueTrackerResult Res = getNextSourceImpl(); + if (Res.isValid()) { + // Update definition, definition index, and subregister for the + // next call of getNextSource. + // Update the current register. + bool OneRegSrc = Res.getNumSources() == 1; + if (OneRegSrc) + Reg = Res.getSrcReg(0); + // Update the result before moving up in the use-def chain + // with the instruction containing the last found sources. + Res.setInst(Def); + + // If we can still move up in the use-def chain, move to the next + // definition. + if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { + Def = MRI.getVRegDef(Reg); + DefIdx = MRI.def_begin(Reg).getOperandNo(); + DefSubReg = Res.getSrcSubReg(0); + return Res; + } + } + // If we end up here, this means we will not be able to find another source + // for the next iteration. Make sure any new call to getNextSource bails out + // early by cutting the use-def chain. + Def = nullptr; + return Res; +} diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp new file mode 100644 index 0000000..b95dffd --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -0,0 +1,684 @@ +//===----- SchedulePostRAList.cpp - list scheduler ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "AggressiveAntiDepBreaker.h" +#include "AntiDepBreaker.h" +#include "CriticalAntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "post-RA-sched" + +STATISTIC(NumNoops, "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); +STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies"); + +// Post-RA scheduling is enabled with +// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to +// override the target. +static cl::opt<bool> +EnablePostRAScheduler("post-RA-scheduler", + cl::desc("Enable scheduling after register allocation"), + cl::init(false), cl::Hidden); +static cl::opt<std::string> +EnableAntiDepBreaking("break-anti-dependencies", + cl::desc("Break post-RA scheduling anti-dependencies: " + "\"critical\", \"all\", or \"none\""), + cl::init("none"), cl::Hidden); + +// If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod +static cl::opt<int> +DebugDiv("postra-sched-debugdiv", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); +static cl::opt<int> +DebugMod("postra-sched-debugmod", + cl::desc("Debug control MBBs that are scheduled"), + cl::init(0), cl::Hidden); + +AntiDepBreaker::~AntiDepBreaker() { } + +namespace { + class PostRAScheduler : public MachineFunctionPass { + const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; + + public: + static char ID; + PostRAScheduler() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<TargetPassConfig>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + + bool enablePostRAScheduler( + const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; + }; + char PostRAScheduler::ID = 0; + + class SchedulePostRATDList : public ScheduleDAGInstrs { + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + LatencyPriorityQueue AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AntiDepBreak - Anti-dependence breaking object, or NULL if none + AntiDepBreaker *AntiDepBreak; + + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + + /// The index in BB of RegionEnd. + /// + /// This is the instruction number from the top of the current block, not + /// the SlotIndex. It is only used by the AntiDepBreaker. + unsigned EndIndex; + + public: + SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, + const RegisterClassInfo &, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs); + + ~SchedulePostRATDList() override; + + /// startBlock - Initialize register live-range state for scheduling in + /// this block. + /// + void startBlock(MachineBasicBlock *BB) override; + + // Set the index of RegionEnd within the current BB. + void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } + + /// Initialize the scheduler state for the next scheduling region. + void enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) override; + + /// Notify that the scheduler has finished scheduling the current region. + void exitRegion() override; + + /// Schedule - Schedule the instruction range using list scheduling. + /// + void schedule() override; + + void EmitSchedule(); + + /// Observe - Update liveness information to account for the current + /// instruction, which will not be scheduled. + /// + void Observe(MachineInstr *MI, unsigned Count); + + /// finishBlock - Clean up register live-range state. + /// + void finishBlock() override; + + private: + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); + + void dumpSchedule() const; + void emitNoop(unsigned CurCycle); + }; +} + +char &llvm::PostRASchedulerID = PostRAScheduler::ID; + +INITIALIZE_PASS(PostRAScheduler, "post-RA-sched", + "Post RA top-down list latency scheduler", false, false) + +SchedulePostRATDList::SchedulePostRATDList( + MachineFunction &MF, MachineLoopInfo &MLI, AliasAnalysis *AA, + const RegisterClassInfo &RCI, + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, + SmallVectorImpl<const TargetRegisterClass *> &CriticalPathRCs) + : ScheduleDAGInstrs(MF, &MLI), AA(AA), EndIndex(0) { + + const InstrItineraryData *InstrItins = + MF.getSubtarget().getInstrItineraryData(); + HazardRec = + MF.getSubtarget().getInstrInfo()->CreateTargetPostRAHazardRecognizer( + InstrItins, this); + + assert((AntiDepMode == TargetSubtargetInfo::ANTIDEP_NONE || + MRI.tracksLiveness()) && + "Live-ins must be accurate for anti-dependency breaking"); + AntiDepBreak = + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ? + (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) : + ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ? + (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : nullptr)); +} + +SchedulePostRATDList::~SchedulePostRATDList() { + delete HazardRec; + delete AntiDepBreak; +} + +/// Initialize state associated with the next scheduling region. +void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); + Sequence.clear(); +} + +/// Print the schedule before exiting the region. +void SchedulePostRATDList::exitRegion() { + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); + ScheduleDAGInstrs::exitRegion(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// dumpSchedule - dump the scheduled Sequence. +void SchedulePostRATDList::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} +#endif + +bool PostRAScheduler::enablePostRAScheduler( + const TargetSubtargetInfo &ST, + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { + Mode = ST.getAntiDepBreakMode(); + ST.getCriticalPathRCs(CriticalPathRCs); + return ST.enablePostRAScheduler() && + OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); +} + +bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { + if (skipOptnoneFunction(*Fn.getFunction())) + return false; + + TII = Fn.getSubtarget().getInstrInfo(); + MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>(); + AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>(); + + RegClassInfo.runOnMachineFunction(Fn); + + // Check for explicit enable/disable of post-ra scheduling. + TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = + TargetSubtargetInfo::ANTIDEP_NONE; + SmallVector<const TargetRegisterClass*, 4> CriticalPathRCs; + if (EnablePostRAScheduler.getPosition() > 0) { + if (!EnablePostRAScheduler) + return false; + } else { + // Check that post-RA scheduling is enabled for this target. + // This may upgrade the AntiDepMode. + if (!enablePostRAScheduler(Fn.getSubtarget(), PassConfig->getOptLevel(), + AntiDepMode, CriticalPathRCs)) + return false; + } + + // Check for antidep breaking override... + if (EnableAntiDepBreaking.getPosition() > 0) { + AntiDepMode = (EnableAntiDepBreaking == "all") + ? TargetSubtargetInfo::ANTIDEP_ALL + : ((EnableAntiDepBreaking == "critical") + ? TargetSubtargetInfo::ANTIDEP_CRITICAL + : TargetSubtargetInfo::ANTIDEP_NONE); + } + + DEBUG(dbgs() << "PostRAScheduler\n"); + + SchedulePostRATDList Scheduler(Fn, MLI, AA, RegClassInfo, AntiDepMode, + CriticalPathRCs); + + // Loop over all of the basic blocks + for (auto &MBB : Fn) { +#ifndef NDEBUG + // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int bbcnt = 0; + if (bbcnt++ % DebugDiv != DebugMod) + continue; + dbgs() << "*** DEBUG scheduling " << Fn.getName() + << ":BB#" << MBB.getNumber() << " ***\n"; + } +#endif + + // Initialize register live-range state for scheduling in this block. + Scheduler.startBlock(&MBB); + + // Schedule each sequence of instructions not interrupted by a label + // or anything else that effectively needs to shut down scheduling. + MachineBasicBlock::iterator Current = MBB.end(); + unsigned Count = MBB.size(), CurrentCount = Count; + for (MachineBasicBlock::iterator I = Current; I != MBB.begin();) { + MachineInstr *MI = std::prev(I); + --Count; + // Calls are not scheduling boundaries before register allocation, but + // post-ra we don't gain anything by scheduling across calls since we + // don't need to worry about register pressure. + if (MI->isCall() || TII->isSchedulingBoundary(MI, &MBB, Fn)) { + Scheduler.enterRegion(&MBB, I, Current, CurrentCount - Count); + Scheduler.setEndIndex(CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); + Scheduler.EmitSchedule(); + Current = MI; + CurrentCount = Count; + Scheduler.Observe(MI, CurrentCount); + } + I = MI; + if (MI->isBundle()) + Count -= MI->getBundleSize(); + } + assert(Count == 0 && "Instruction count mismatch!"); + assert((MBB.begin() == Current || CurrentCount != 0) && + "Instruction count mismatch!"); + Scheduler.enterRegion(&MBB, MBB.begin(), Current, CurrentCount); + Scheduler.setEndIndex(CurrentCount); + Scheduler.schedule(); + Scheduler.exitRegion(); + Scheduler.EmitSchedule(); + + // Clean up register live-range state. + Scheduler.finishBlock(); + + // Update register kills + Scheduler.fixupKills(&MBB); + } + + return true; +} + +/// StartBlock - Initialize register live-range state for scheduling in +/// this block. +/// +void SchedulePostRATDList::startBlock(MachineBasicBlock *BB) { + // Call the superclass. + ScheduleDAGInstrs::startBlock(BB); + + // Reset the hazard recognizer and anti-dep breaker. + HazardRec->Reset(); + if (AntiDepBreak) + AntiDepBreak->StartBlock(BB); +} + +/// Schedule - Schedule the instruction range using list scheduling. +/// +void SchedulePostRATDList::schedule() { + // Build the scheduling graph. + buildSchedGraph(AA); + + if (AntiDepBreak) { + unsigned Broken = + AntiDepBreak->BreakAntiDependencies(SUnits, RegionBegin, RegionEnd, + EndIndex, DbgValues); + + if (Broken != 0) { + // We made changes. Update the dependency graph. + // Theoretically we could update the graph in place: + // When a live range is changed to use a different register, remove + // the def's anti-dependence *and* output-dependence edges due to + // that register, and add new anti-dependence and output-dependence + // edges based on the next live range of the register. + ScheduleDAG::clearDAG(); + buildSchedGraph(AA); + + NumFixedAnti += Broken; + } + } + + DEBUG(dbgs() << "********** List Scheduling **********\n"); + DEBUG( + for (const SUnit &SU : SUnits) { + SU.dumpAll(this); + dbgs() << '\n'; + } + ); + + AvailableQueue.initNodes(SUnits); + ListScheduleTopDown(); + AvailableQueue.releaseState(); +} + +/// Observe - Update liveness information to account for the current +/// instruction, which will not be scheduled. +/// +void SchedulePostRATDList::Observe(MachineInstr *MI, unsigned Count) { + if (AntiDepBreak) + AntiDepBreak->Observe(MI, Count, EndIndex); +} + +/// FinishBlock - Clean up register live-range state. +/// +void SchedulePostRATDList::finishBlock() { + if (AntiDepBreak) + AntiDepBreak->FinishBlock(); + + // Call the superclass. + ScheduleDAGInstrs::finishBlock(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { + SUnit *SuccSU = SuccEdge->getSUnit(); + + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + return; + } +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + --SuccSU->NumPredsLeft; + + // Standard scheduler algorithms will recompute the depth of the successor + // here as such: + // SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); + // + // However, we lazily compute node depth instead. Note that + // ScheduleNodeTopDown has already updated the depth of this node which causes + // all descendents to be marked dirty. Setting the successor depth explicitly + // here would cause depth to be recomputed for all its ancestors. If the + // successor is not yet ready (because of a transitively redundant edge) then + // this causes depth computation to be quadratic in the size of the DAG. + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) + PendingQueue.push_back(SuccSU); +} + +/// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + ReleaseSucc(SU, &*I); + } +} + +/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && + "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + ReleaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue.scheduledNode(SU); +} + +/// emitNoop - Add a noop to the current instruction sequence. +void SchedulePostRATDList::emitNoop(unsigned CurCycle) { + DEBUG(dbgs() << "*** Emitting noop in cycle " << CurCycle << '\n'); + HazardRec->EmitNoop(); + Sequence.push_back(nullptr); // NULL here means noop + ++NumNoops; +} + +/// ListScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void SchedulePostRATDList::ListScheduleTopDown() { + unsigned CurCycle = 0; + + // We're scheduling top-down but we're visiting the regions in + // bottom-up order, so we don't know the hazards at the start of a + // region. So assume no hazards (this should usually be ok as most + // blocks are a single region). + HazardRec->Reset(); + + // Release any successors of the special Entry node. + ReleaseSuccessors(&EntrySU); + + // Add all leaves to Available queue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) { + AvailableQueue.push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // In any cycle where we can't schedule any instructions, we must + // stall or emit a noop, depending on the target. + bool CycleHasInsts = false; + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + unsigned MinDepth = ~0u; + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() <= CurCycle) { + AvailableQueue.push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); + } + + DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this)); + + SUnit *FoundSUnit = nullptr, *NotPreferredSUnit = nullptr; + bool HasNoopHazards = false; + while (!AvailableQueue.empty()) { + SUnit *CurSUnit = AvailableQueue.pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); + if (HT == ScheduleHazardRecognizer::NoHazard) { + if (HazardRec->ShouldPreferAnother(CurSUnit)) { + if (!NotPreferredSUnit) { + // If this is the first non-preferred node for this cycle, then + // record it and continue searching for a preferred node. If this + // is not the first non-preferred node, then treat it as though + // there had been a hazard. + NotPreferredSUnit = CurSUnit; + continue; + } + } else { + FoundSUnit = CurSUnit; + break; + } + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // If we have a non-preferred node, push it back onto the available list. + // If we did not find a preferred node, then schedule this first + // non-preferred node. + if (NotPreferredSUnit) { + if (!FoundSUnit) { + DEBUG(dbgs() << "*** Will schedule a non-preferred instruction...\n"); + FoundSUnit = NotPreferredSUnit; + } else { + AvailableQueue.push(NotPreferredSUnit); + } + + NotPreferredSUnit = nullptr; + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue.push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule... + if (FoundSUnit) { + // If we need to emit noops prior to this instruction, then do so. + unsigned NumPreNoops = HazardRec->PreEmitNoops(FoundSUnit); + for (unsigned i = 0; i != NumPreNoops; ++i) + emitNoop(CurCycle); + + // ... schedule the node... + ScheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + CycleHasInsts = true; + if (HazardRec->atIssueLimit()) { + DEBUG(dbgs() << "*** Max instructions per cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++CurCycle; + CycleHasInsts = false; + } + } else { + if (CycleHasInsts) { + DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, + // just advance the current cycle and try again. + DEBUG(dbgs() << "*** Stall in cycle " << CurCycle << '\n'); + HazardRec->AdvanceCycle(); + ++NumStalls; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + emitNoop(CurCycle); + } + + ++CurCycle; + CycleHasInsts = false; + } + } + +#ifndef NDEBUG + unsigned ScheduledNodes = VerifyScheduledDAG(/*isBottomUp=*/false); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +#endif // NDEBUG +} + +// EmitSchedule - Emit the machine code in scheduled order. +void SchedulePostRATDList::EmitSchedule() { + RegionBegin = RegionEnd; + + // If first instruction was a DBG_VALUE then put it back. + if (FirstDbgValue) + BB->splice(RegionEnd, BB, FirstDbgValue); + + // Then re-insert them according to the given schedule. + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + BB->splice(RegionEnd, BB, SU->getInstr()); + else + // Null SUnit* is a noop. + TII->insertNoop(*BB, RegionEnd); + + // Update the Begin iterator, as the first instruction in the block + // may have been scheduled later. + if (i == 0) + RegionBegin = std::prev(RegionEnd); + } + + // Reinsert any remaining debug_values. + for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator + DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) { + std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI); + MachineInstr *DbgValue = P.first; + MachineBasicBlock::iterator OrigPrivMI = P.second; + BB->splice(++OrigPrivMI, BB, DbgValue); + } + DbgValues.clear(); + FirstDbgValue = nullptr; +} diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp new file mode 100644 index 0000000..d27ea2f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -0,0 +1,168 @@ +//===---------------------- ProcessImplicitDefs.cpp -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "processimplicitdefs" + +namespace { +/// Process IMPLICIT_DEF instructions and make sure there is one implicit_def +/// for each use. Add isUndef marker to implicit_def defs and their uses. +class ProcessImplicitDefs : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + SmallSetVector<MachineInstr*, 16> WorkList; + + void processImplicitDef(MachineInstr *MI); + bool canTurnIntoImplicitDef(MachineInstr *MI); + +public: + static char ID; + + ProcessImplicitDefs() : MachineFunctionPass(ID) { + initializeProcessImplicitDefsPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &au) const override; + + bool runOnMachineFunction(MachineFunction &fn) override; +}; +} // end anonymous namespace + +char ProcessImplicitDefs::ID = 0; +char &llvm::ProcessImplicitDefsID = ProcessImplicitDefs::ID; + +INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions", false, false) +INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs", + "Process Implicit Definitions", false, false) + +void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<AAResultsWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) { + if (!MI->isCopyLike() && + !MI->isInsertSubreg() && + !MI->isRegSequence() && + !MI->isPHI()) + return false; + for (const MachineOperand &MO : MI->operands()) + if (MO.isReg() && MO.isUse() && MO.readsReg()) + return false; + return true; +} + +void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { + DEBUG(dbgs() << "Processing " << *MI); + unsigned Reg = MI->getOperand(0).getReg(); + + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // For virtual registers, mark all uses as <undef>, and convert users to + // implicit-def when possible. + for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) { + MO.setIsUndef(); + MachineInstr *UserMI = MO.getParent(); + if (!canTurnIntoImplicitDef(UserMI)) + continue; + DEBUG(dbgs() << "Converting to IMPLICIT_DEF: " << *UserMI); + UserMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + WorkList.insert(UserMI); + } + MI->eraseFromParent(); + return; + } + + // This is a physreg implicit-def. + // Look for the first instruction to use or define an alias. + MachineBasicBlock::instr_iterator UserMI = MI->getIterator(); + MachineBasicBlock::instr_iterator UserE = MI->getParent()->instr_end(); + bool Found = false; + for (++UserMI; UserMI != UserE; ++UserMI) { + for (MachineOperand &MO : UserMI->operands()) { + if (!MO.isReg()) + continue; + unsigned UserReg = MO.getReg(); + if (!TargetRegisterInfo::isPhysicalRegister(UserReg) || + !TRI->regsOverlap(Reg, UserReg)) + continue; + // UserMI uses or redefines Reg. Set <undef> flags on all uses. + Found = true; + if (MO.isUse()) + MO.setIsUndef(); + } + if (Found) + break; + } + + // If we found the using MI, we can erase the IMPLICIT_DEF. + if (Found) { + DEBUG(dbgs() << "Physreg user: " << *UserMI); + MI->eraseFromParent(); + return; + } + + // Using instr wasn't found, it could be in another block. + // Leave the physreg IMPLICIT_DEF, but trim any extra operands. + for (unsigned i = MI->getNumOperands() - 1; i; --i) + MI->RemoveOperand(i); + DEBUG(dbgs() << "Keeping physreg: " << *MI); +} + +/// processImplicitDefs - Process IMPLICIT_DEF instructions and turn them into +/// <undef> operands. +bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &MF) { + + DEBUG(dbgs() << "********** PROCESS IMPLICIT DEFS **********\n" + << "********** Function: " << MF.getName() << '\n'); + + bool Changed = false; + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + assert(MRI->isSSA() && "ProcessImplicitDefs only works on SSA form."); + assert(WorkList.empty() && "Inconsistent worklist state"); + + for (MachineFunction::iterator MFI = MF.begin(), MFE = MF.end(); + MFI != MFE; ++MFI) { + // Scan the basic block for implicit defs. + for (MachineBasicBlock::instr_iterator MBBI = MFI->instr_begin(), + MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) + if (MBBI->isImplicitDef()) + WorkList.insert(&*MBBI); + + if (WorkList.empty()) + continue; + + DEBUG(dbgs() << "BB#" << MFI->getNumber() << " has " << WorkList.size() + << " implicit defs.\n"); + Changed = true; + + // Drain the WorkList to recursively process any new implicit defs. + do processImplicitDef(WorkList.pop_back_val()); + while (!WorkList.empty()); + } + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp new file mode 100644 index 0000000..939c500 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -0,0 +1,1050 @@ +//===-- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is responsible for finalizing the functions frame layout, saving +// callee saved registers, and for emitting prolog & epilog code for the +// function. +// +// This pass must be run after register allocation. After this pass is +// executed, it is illegal to construct MO_FrameIndex operands. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <climits> + +using namespace llvm; + +#define DEBUG_TYPE "pei" + +namespace { +class PEI : public MachineFunctionPass { +public: + static char ID; + PEI() : MachineFunctionPass(ID) { + initializePEIPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract + /// frame indexes with appropriate references. + /// + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + RegScavenger *RS; + + // MinCSFrameIndex, MaxCSFrameIndex - Keeps the range of callee saved + // stack frame indexes. + unsigned MinCSFrameIndex, MaxCSFrameIndex; + + // Save and Restore blocks of the current function. Typically there is a + // single save block, unless Windows EH funclets are involved. + SmallVector<MachineBasicBlock *, 1> SaveBlocks; + SmallVector<MachineBasicBlock *, 4> RestoreBlocks; + + // Flag to control whether to use the register scavenger to resolve + // frame index materialization registers. Set according to + // TRI->requiresFrameIndexScavenging() for the current function. + bool FrameIndexVirtualScavenging; + + void calculateSets(MachineFunction &Fn); + void calculateCallsInformation(MachineFunction &Fn); + void assignCalleeSavedSpillSlots(MachineFunction &Fn, + const BitVector &SavedRegs); + void insertCSRSpillsAndRestores(MachineFunction &Fn); + void calculateFrameObjectOffsets(MachineFunction &Fn); + void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); + void scavengeFrameVirtualRegs(MachineFunction &Fn); + void insertPrologEpilogCode(MachineFunction &Fn); +}; +} // namespace + +char PEI::ID = 0; +char &llvm::PrologEpilogCodeInserterID = PEI::ID; + +static cl::opt<unsigned> +WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + +INITIALIZE_PASS_BEGIN(PEI, "prologepilog", + "Prologue/Epilogue Insertion", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_END(PEI, "prologepilog", + "Prologue/Epilogue Insertion & Frame Finalization", + false, false) + +STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); +STATISTIC(NumBytesStackSpace, + "Number of bytes used for stack in all functions"); + +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<StackProtector>(); + AU.addRequired<TargetPassConfig>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Compute the set of return blocks +void PEI::calculateSets(MachineFunction &Fn) { + const MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Even when we do not change any CSR, we still want to insert the + // prologue and epilogue of the function. + // So set the save points for those. + + // Use the points found by shrink-wrapping, if any. + if (MFI->getSavePoint()) { + SaveBlocks.push_back(MFI->getSavePoint()); + assert(MFI->getRestorePoint() && "Both restore and save must be set"); + MachineBasicBlock *RestoreBlock = MFI->getRestorePoint(); + // If RestoreBlock does not have any successor and is not a return block + // then the end point is unreachable and we do not need to insert any + // epilogue. + if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock()) + RestoreBlocks.push_back(RestoreBlock); + return; + } + + // Save refs to entry and return blocks. + SaveBlocks.push_back(&Fn.front()); + for (MachineBasicBlock &MBB : Fn) { + if (MBB.isEHFuncletEntry()) + SaveBlocks.push_back(&MBB); + if (MBB.isReturnBlock()) + RestoreBlocks.push_back(&MBB); + } +} + +/// StackObjSet - A set of stack object indexes +typedef SmallSetVector<int, 8> StackObjSet; + +/// runOnMachineFunction - Insert prolog/epilog code and replace abstract +/// frame indexes with appropriate references. +/// +bool PEI::runOnMachineFunction(MachineFunction &Fn) { + const Function* F = Fn.getFunction(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + + assert(!Fn.getRegInfo().getNumVirtRegs() && "Regalloc must assign all vregs"); + + RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr; + FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn); + + // Calculate the MaxCallFrameSize and AdjustsStack variables for the + // function's frame information. Also eliminates call frame pseudo + // instructions. + calculateCallsInformation(Fn); + + // Determine which of the registers in the callee save list should be saved. + BitVector SavedRegs; + TFI->determineCalleeSaves(Fn, SavedRegs, RS); + + // Insert spill code for any callee saved registers that are modified. + assignCalleeSavedSpillSlots(Fn, SavedRegs); + + // Determine placement of CSR spill/restore code: + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); + + // Add the code to save and restore the callee saved registers. + if (!F->hasFnAttribute(Attribute::Naked)) + insertCSRSpillsAndRestores(Fn); + + // Allow the target machine to make final modifications to the function + // before the frame layout is finalized. + TFI->processFunctionBeforeFrameFinalized(Fn, RS); + + // Calculate actual frame offsets for all abstract stack objects... + calculateFrameObjectOffsets(Fn); + + // Add prolog and epilog code to the function. This function is required + // to align the stack frame as necessary for any stack variables or + // called functions. Because of this, calculateCalleeSavedRegisters() + // must be called before this function in order to set the AdjustsStack + // and MaxCallFrameSize variables. + if (!F->hasFnAttribute(Attribute::Naked)) + insertPrologEpilogCode(Fn); + + // Replace all MO_FrameIndex operands with physical register references + // and actual offsets. + // + replaceFrameIndices(Fn); + + // If register scavenging is needed, as we've enabled doing it as a + // post-pass, scavenge the virtual registers that frame index elimination + // inserted. + if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) + scavengeFrameVirtualRegs(Fn); + + // Clear any vregs created by virtual scavenging. + Fn.getRegInfo().clearVirtRegs(); + + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + uint64_t StackSize = MFI->getStackSize(); + if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) { + DiagnosticInfoStackSize DiagStackSize(*F, StackSize); + F->getContext().diagnose(DiagStackSize); + } + + delete RS; + SaveBlocks.clear(); + RestoreBlocks.clear(); + return true; +} + +/// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack +/// variables for the function's frame information and eliminate call frame +/// pseudo instructions. +void PEI::calculateCallsInformation(MachineFunction &Fn) { + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + unsigned MaxCallFrameSize = 0; + bool AdjustsStack = MFI->adjustsStack(); + + // Get the function call frame set-up and tear-down instruction opcode + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + // Early exit for targets which have no call frame setup/destroy pseudo + // instructions. + if (FrameSetupOpcode == ~0u && FrameDestroyOpcode == ~0u) + return; + + std::vector<MachineBasicBlock::iterator> FrameSDOps; + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + assert(I->getNumOperands() >= 1 && "Call Frame Setup/Destroy Pseudo" + " instructions should have a single immediate argument!"); + unsigned Size = I->getOperand(0).getImm(); + if (Size > MaxCallFrameSize) MaxCallFrameSize = Size; + AdjustsStack = true; + FrameSDOps.push_back(I); + } else if (I->isInlineAsm()) { + // Some inline asm's need a stack frame, as indicated by operand 1. + unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); + if (ExtraInfo & InlineAsm::Extra_IsAlignStack) + AdjustsStack = true; + } + + MFI->setAdjustsStack(AdjustsStack); + MFI->setMaxCallFrameSize(MaxCallFrameSize); + + for (std::vector<MachineBasicBlock::iterator>::iterator + i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) { + MachineBasicBlock::iterator I = *i; + + // If call frames are not being included as part of the stack frame, and + // the target doesn't indicate otherwise, remove the call frame pseudos + // here. The sub/add sp instruction pairs are still inserted, but we don't + // need to track the SP adjustment for frame index elimination. + if (TFI->canSimplifyCallFramePseudos(Fn)) + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + } +} + +void PEI::assignCalleeSavedSpillSlots(MachineFunction &F, + const BitVector &SavedRegs) { + // These are used to keep track the callee-save area. Initialize them. + MinCSFrameIndex = INT_MAX; + MaxCSFrameIndex = 0; + + if (SavedRegs.empty()) + return; + + const TargetRegisterInfo *RegInfo = F.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&F); + + std::vector<CalleeSavedInfo> CSI; + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (SavedRegs.test(Reg)) + CSI.push_back(CalleeSavedInfo(Reg)); + } + + const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. + + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! + + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } + + I->setFrameIdx(FrameIdx); + } + } + + MFI->setCalleeSavedInfo(CSI); +} + +/// Helper function to update the liveness information for the callee-saved +/// registers. +static void updateLiveness(MachineFunction &MF) { + MachineFrameInfo *MFI = MF.getFrameInfo(); + // Visited will contain all the basic blocks that are in the region + // where the callee saved registers are alive: + // - Anything that is not Save or Restore -> LiveThrough. + // - Save -> LiveIn. + // - Restore -> LiveOut. + // The live-out is not attached to the block, so no need to keep + // Restore in this set. + SmallPtrSet<MachineBasicBlock *, 8> Visited; + SmallVector<MachineBasicBlock *, 8> WorkList; + MachineBasicBlock *Entry = &MF.front(); + MachineBasicBlock *Save = MFI->getSavePoint(); + + if (!Save) + Save = Entry; + + if (Entry != Save) { + WorkList.push_back(Entry); + Visited.insert(Entry); + } + Visited.insert(Save); + + MachineBasicBlock *Restore = MFI->getRestorePoint(); + if (Restore) + // By construction Restore cannot be visited, otherwise it + // means there exists a path to Restore that does not go + // through Save. + WorkList.push_back(Restore); + + while (!WorkList.empty()) { + const MachineBasicBlock *CurBB = WorkList.pop_back_val(); + // By construction, the region that is after the save point is + // dominated by the Save and post-dominated by the Restore. + if (CurBB == Save && Save != Restore) + continue; + // Enqueue all the successors not already visited. + // Those are by construction either before Save or after Restore. + for (MachineBasicBlock *SuccBB : CurBB->successors()) + if (Visited.insert(SuccBB).second) + WorkList.push_back(SuccBB); + } + + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + for (MachineBasicBlock *MBB : Visited) { + MCPhysReg Reg = CSI[i].getReg(); + // Add the callee-saved register as live-in. + // It's killed at the spill. + if (!MBB->isLiveIn(Reg)) + MBB->addLiveIn(Reg); + } + } +} + +/// insertCSRSpillsAndRestores - Insert spill and restore code for +/// callee saved registers used in the function. +/// +void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { + // Get callee saved register information. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + + MFI->setCalleeSavedInfoValid(true); + + // Early exit if no callee saved registers are modified! + if (CSI.empty()) + return; + + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo(); + MachineBasicBlock::iterator I; + + // Spill using target interface. + for (MachineBasicBlock *SaveBlock : SaveBlocks) { + I = SaveBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*SaveBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + // Insert the spill to the stack frame. + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*SaveBlock, I, Reg, true, CSI[i].getFrameIdx(), + RC, TRI); + } + } + // Update the live-in information of all the blocks up to the save point. + updateLiveness(Fn); + } + + // Restore using target interface. + for (MachineBasicBlock *MBB : RestoreBlocks) { + I = MBB->end(); + + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; + + bool AtStart = I == MBB->begin(); + MachineBasicBlock::iterator BeforeI = I; + if (!AtStart) + --BeforeI; + + // Restore all registers immediately before the return and any + // terminators that precede it. + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } + } + } + } +} + +/// AdjustStackOffset - Helper function used to adjust the stack frame offset. +static inline void +AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, + bool StackGrowsDown, int64_t &Offset, + unsigned &MaxAlign, unsigned Skew) { + // If the stack grows down, add the object size to find the lowest address. + if (StackGrowsDown) + Offset += MFI->getObjectSize(FrameIdx); + + unsigned Align = MFI->getObjectAlignment(FrameIdx); + + // If the alignment of this object is greater than that of the stack, then + // increase the stack alignment to match. + MaxAlign = std::max(MaxAlign, Align); + + // Adjust to alignment boundary. + Offset = RoundUpToAlignment(Offset, Align, Skew); + + if (StackGrowsDown) { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset + } else { + DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n"); + MFI->setObjectOffset(FrameIdx, Offset); + Offset += MFI->getObjectSize(FrameIdx); + } +} + +/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e., +/// those required to be close to the Stack Protector) to stack offsets. +static void +AssignProtectedObjSet(const StackObjSet &UnassignedObjs, + SmallSet<int, 16> &ProtectedObjs, + MachineFrameInfo *MFI, bool StackGrowsDown, + int64_t &Offset, unsigned &MaxAlign, unsigned Skew) { + + for (StackObjSet::const_iterator I = UnassignedObjs.begin(), + E = UnassignedObjs.end(); I != E; ++I) { + int i = *I; + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + ProtectedObjs.insert(i); + } +} + +/// calculateFrameObjectOffsets - Calculate actual frame offsets for all of the +/// abstract stack objects. +/// +void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + StackProtector *SP = &getAnalysis<StackProtector>(); + + bool StackGrowsDown = + TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + // Loop over all of the stack objects, assigning sequential addresses... + MachineFrameInfo *MFI = Fn.getFrameInfo(); + + // Start at the beginning of the local area. + // The Offset is the distance from the stack top in the direction + // of stack growth -- so it's always nonnegative. + int LocalAreaOffset = TFI.getOffsetOfLocalArea(); + if (StackGrowsDown) + LocalAreaOffset = -LocalAreaOffset; + assert(LocalAreaOffset >= 0 + && "Local area offset should be in direction of stack growth"); + int64_t Offset = LocalAreaOffset; + + // Skew to be applied to alignment. + unsigned Skew = TFI.getStackAlignmentSkew(Fn); + + // If there are fixed sized objects that are preallocated in the local area, + // non-fixed objects can't be allocated right at the start of local area. + // We currently don't support filling in holes in between fixed sized + // objects, so we adjust 'Offset' to point to the end of last fixed sized + // preallocated object. + for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) { + int64_t FixedOff; + if (StackGrowsDown) { + // The maximum distance from the stack pointer is at lower address of + // the object -- which is given by offset. For down growing stack + // the offset is negative, so we negate the offset to get the distance. + FixedOff = -MFI->getObjectOffset(i); + } else { + // The maximum distance from the start pointer is at the upper + // address of the object. + FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i); + } + if (FixedOff > Offset) Offset = FixedOff; + } + + // First assign frame offsets to stack objects that are used to spill + // callee saved registers. + if (StackGrowsDown) { + for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) { + // If the stack grows down, we need to add the size to find the lowest + // address of the object. + Offset += MFI->getObjectSize(i); + + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = RoundUpToAlignment(Offset, Align, Skew); + + MFI->setObjectOffset(i, -Offset); // Set the computed offset + } + } else { + int MaxCSFI = MaxCSFrameIndex, MinCSFI = MinCSFrameIndex; + for (int i = MaxCSFI; i >= MinCSFI ; --i) { + unsigned Align = MFI->getObjectAlignment(i); + // Adjust to alignment boundary + Offset = RoundUpToAlignment(Offset, Align, Skew); + + MFI->setObjectOffset(i, Offset); + Offset += MFI->getObjectSize(i); + } + } + + unsigned MaxAlign = MFI->getMaxAlignment(); + + // Make sure the special register scavenging spill slot is closest to the + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. + const TargetRegisterInfo *RegInfo = Fn.getSubtarget().getRegisterInfo(); + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + } + + // FIXME: Once this is working, then enable flag will change to a target + // check for whether the frame is large enough to want to use virtual + // frame index registers. Functions which don't want/need this optimization + // will continue to use the existing code path. + if (MFI->getUseLocalStackAllocationBlock()) { + unsigned Align = MFI->getLocalFrameMaxAlign(); + + // Adjust to alignment boundary. + Offset = RoundUpToAlignment(Offset, Align, Skew); + + DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n"); + + // Resolve offsets for objects in the local block. + for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) { + std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i); + int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second; + DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" << + FIOffset << "]\n"); + MFI->setObjectOffset(Entry.first, FIOffset); + } + // Allocate the local block + Offset += MFI->getLocalFrameSize(); + + MaxAlign = std::max(Align, MaxAlign); + } + + // Make sure that the stack protector comes before the local variables on the + // stack. + SmallSet<int, 16> ProtectedObjs; + if (MFI->getStackProtectorIndex() >= 0) { + StackObjSet LargeArrayObjs; + StackObjSet SmallArrayObjs; + StackObjSet AddrOfObjs; + + AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown, + Offset, MaxAlign, Skew); + + // Assign large stack objects first. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + + switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) { + case StackProtector::SSPLK_None: + continue; + case StackProtector::SSPLK_SmallArray: + SmallArrayObjs.insert(i); + continue; + case StackProtector::SSPLK_AddrOf: + AddrOfObjs.insert(i); + continue; + case StackProtector::SSPLK_LargeArray: + LargeArrayObjs.insert(i); + continue; + } + llvm_unreachable("Unexpected SSPLayoutKind."); + } + + AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + AssignProtectedObjSet(AddrOfObjs, ProtectedObjs, MFI, StackGrowsDown, + Offset, MaxAlign, Skew); + } + + // Then assign frame offsets to stack objects that are not used to spill + // callee saved registers. + for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) { + if (MFI->isObjectPreAllocated(i) && + MFI->getUseLocalStackAllocationBlock()) + continue; + if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) + continue; + if (RS && RS->isScavengingFrameIndex((int)i)) + continue; + if (MFI->isDeadObjectIndex(i)) + continue; + if (MFI->getStackProtectorIndex() == (int)i) + continue; + if (ProtectedObjs.count(i)) + continue; + + AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign, Skew); + } + + // Make sure the special register scavenging spill slot is closest to the + // stack pointer. + if (RS && !EarlyScavengingSlots) { + SmallVector<int, 2> SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVectorImpl<int>::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign, Skew); + } + + if (!TFI.targetHandlesStackFrameRounding()) { + // If we have reserved argument space for call sites in the function + // immediately on entry to the current function, count it as part of the + // overall stack size. + if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn)) + Offset += MFI->getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (MFI->adjustsStack() || MFI->hasVarSizedObjects() || + (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0)) + StackAlign = TFI.getStackAlignment(); + else + StackAlign = TFI.getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + Offset = RoundUpToAlignment(Offset, StackAlign, Skew); + } + + // Update frame info to pretend that this is part of the stack... + int64_t StackSize = Offset - LocalAreaOffset; + MFI->setStackSize(StackSize); + NumBytesStackSpace += StackSize; +} + +/// insertPrologEpilogCode - Scan the function for modified callee saved +/// registers, insert spill code for these callee saved registers, then add +/// prolog and epilog code to the function. +/// +void PEI::insertPrologEpilogCode(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + + // Add prologue to the function... + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.emitPrologue(Fn, *SaveBlock); + + // Add epilogue to restore the callee-save registers in each exiting block. + for (MachineBasicBlock *RestoreBlock : RestoreBlocks) + TFI.emitEpilogue(Fn, *RestoreBlock); + + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.inlineStackProbe(Fn, *SaveBlock); + + // Emit additional code that is required to support segmented stacks, if + // we've been asked for it. This, when linked with a runtime with support + // for segmented stacks (libgcc is one), will result in allocating stack + // space in small chunks instead of one large contiguous block. + if (Fn.shouldSplitStack()) { + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForSegmentedStacks(Fn, *SaveBlock); + } + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + for (MachineBasicBlock *SaveBlock : SaveBlocks) + TFI.adjustForHiPEPrologue(Fn, *SaveBlock); +} + +/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical +/// register references and actual offsets. +/// +void PEI::replaceFrameIndices(MachineFunction &Fn) { + const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering(); + if (!TFI.needsFrameIndexResolution(Fn)) return; + + // Store SPAdj at exit of a basic block. + SmallVector<int, 8> SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + + // Iterate over the reachable blocks in DFS order. + for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (auto &BB : Fn) { + if (Reachable.count(&BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(&BB, Fn, SPAdj); + } +} + +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { + assert(Fn.getSubtarget().getRegisterInfo() && + "getRegisterInfo() must be implemented!"); + const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo(); + const TargetRegisterInfo &TRI = *Fn.getSubtarget().getRegisterInfo(); + const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering(); + unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + + bool InsideCallSequence = false; + + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + InsideCallSequence = (I->getOpcode() == FrameSetupOpcode); + SPAdj += TII.getSPAdjust(I); + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = std::prev(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); + + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = std::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indices in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indices can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // TODO: This code should be commoned with the code for + // PATCHPOINT. There's no good reason for the difference in + // implementation other than historical accident. The only + // remaining difference is the unconditional use of the stack + // pointer as the base register. + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + assert((!MI->isDebugValue() || i == 0) && + "Frame indicies can only appear as the first operand of a " + "DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(i + 1); + const unsigned refOffset = + TFI->getFrameIndexReferenceFromSP(Fn, MI->getOperand(i).getIndex(), + Reg); + + Offset.setImm(Offset.getImm() + refOffset); + MI->getOperand(i).ChangeToRegister(Reg, false /*isDef*/); + continue; + } + + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? nullptr : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } + + MI = nullptr; + break; + } + + // If we are looking at a call sequence, we need to keep track of + // the SP adjustment made by each instruction in the sequence. + // This includes both the frame setup/destroy pseudos (handled above), + // as well as other instructions that have side effects w.r.t the SP. + // Note that this must come after eliminateFrameIndex, because + // if I itself referred to a frame index, we shouldn't count its own + // adjustment. + if (MI && InsideCallSequence) + SPAdj += TII.getSPAdjust(MI); + + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + } +} + +/// scavengeFrameVirtualRegs - Replace all frame index virtual registers +/// with physical registers. Use the register scavenger to find an +/// appropriate register to use. +/// +/// FIXME: Iterating over the instruction stream is unnecessary. We can simply +/// iterate over the vreg use list, which at this point only contains machine +/// operands for which eliminateFrameIndex need a new scratch reg. +void +PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { + // Run through the instructions and find any virtual registers. + for (MachineFunction::iterator BB = Fn.begin(), + E = Fn.end(); BB != E; ++BB) { + RS->enterBasicBlock(&*BB); + + int SPAdj = 0; + + // The instruction stream may change in the loop, so check BB->end() + // directly. + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + // We might end up here again with a NULL iterator if we scavenged a + // register for which we inserted spill code for definition by what was + // originally the first instruction in BB. + if (I == MachineBasicBlock::iterator(nullptr)) + I = BB->begin(); + + MachineInstr *MI = I; + MachineBasicBlock::iterator J = std::next(I); + MachineBasicBlock::iterator P = + I == BB->begin() ? MachineBasicBlock::iterator(nullptr) + : std::prev(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MI->getOperand(i).isReg()) { + MachineOperand &MO = MI->getOperand(i); + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + + // Replace this reference to the virtual register with the + // scratch register. + assert (ScratchReg && "Missing scratch register!"); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setRegUsed(ScratchReg); + } + } + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != std::prev(J)) { + BB->splice(J, &*BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + } else + ++I; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp new file mode 100644 index 0000000..1f46417 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/PseudoSourceValue.cpp @@ -0,0 +1,142 @@ +//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the PseudoSourceValue class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" +#include <map> +using namespace llvm; + +static const char *const PSVNames[] = { + "Stack", "GOT", "JumpTable", "ConstantPool", "FixedStack", + "GlobalValueCallEntry", "ExternalSymbolCallEntry"}; + +PseudoSourceValue::PseudoSourceValue(PSVKind Kind) : Kind(Kind) {} + +PseudoSourceValue::~PseudoSourceValue() {} + +void PseudoSourceValue::printCustom(raw_ostream &O) const { + O << PSVNames[Kind]; +} + +bool PseudoSourceValue::isConstant(const MachineFrameInfo *) const { + if (isStack()) + return false; + if (isGOT() || isConstantPool() || isJumpTable()) + return true; + llvm_unreachable("Unknown PseudoSourceValue!"); +} + +bool PseudoSourceValue::isAliased(const MachineFrameInfo *) const { + if (isStack() || isGOT() || isConstantPool() || isJumpTable()) + return false; + llvm_unreachable("Unknown PseudoSourceValue!"); +} + +bool PseudoSourceValue::mayAlias(const MachineFrameInfo *) const { + return !(isGOT() || isConstantPool() || isJumpTable()); +} + +bool FixedStackPseudoSourceValue::isConstant( + const MachineFrameInfo *MFI) const { + return MFI && MFI->isImmutableObjectIndex(FI); +} + +bool FixedStackPseudoSourceValue::isAliased(const MachineFrameInfo *MFI) const { + if (!MFI) + return true; + return MFI->isAliasedObjectIndex(FI); +} + +bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const { + if (!MFI) + return true; + // Spill slots will not alias any LLVM IR value. + return !MFI->isSpillSlotObjectIndex(FI); +} + +void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const { + OS << "FixedStack" << FI; +} + +CallEntryPseudoSourceValue::CallEntryPseudoSourceValue(PSVKind Kind) + : PseudoSourceValue(Kind) {} + +bool CallEntryPseudoSourceValue::isConstant(const MachineFrameInfo *) const { + return false; +} + +bool CallEntryPseudoSourceValue::isAliased(const MachineFrameInfo *) const { + return false; +} + +bool CallEntryPseudoSourceValue::mayAlias(const MachineFrameInfo *) const { + return false; +} + +GlobalValuePseudoSourceValue::GlobalValuePseudoSourceValue( + const GlobalValue *GV) + : CallEntryPseudoSourceValue(GlobalValueCallEntry), GV(GV) {} + +ExternalSymbolPseudoSourceValue::ExternalSymbolPseudoSourceValue(const char *ES) + : CallEntryPseudoSourceValue(ExternalSymbolCallEntry), ES(ES) {} + +PseudoSourceValueManager::PseudoSourceValueManager() + : StackPSV(PseudoSourceValue::Stack), GOTPSV(PseudoSourceValue::GOT), + JumpTablePSV(PseudoSourceValue::JumpTable), + ConstantPoolPSV(PseudoSourceValue::ConstantPool) {} + +const PseudoSourceValue *PseudoSourceValueManager::getStack() { + return &StackPSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getGOT() { return &GOTPSV; } + +const PseudoSourceValue *PseudoSourceValueManager::getConstantPool() { + return &ConstantPoolPSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getJumpTable() { + return &JumpTablePSV; +} + +const PseudoSourceValue *PseudoSourceValueManager::getFixedStack(int FI) { + std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI]; + if (!V) + V = llvm::make_unique<FixedStackPseudoSourceValue>(FI); + return V.get(); +} + +const PseudoSourceValue * +PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) { + std::unique_ptr<const GlobalValuePseudoSourceValue> &E = + GlobalCallEntries[GV]; + if (!E) + E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV); + return E.get(); +} + +const PseudoSourceValue * +PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) { + std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E = + ExternalCallEntries[ES]; + if (!E) + E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES); + return E.get(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp new file mode 100644 index 0000000..16ff48e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -0,0 +1,155 @@ +//===-- RegAllocBase.cpp - Register Allocator Base Class ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class which provides common functionality +// for LiveIntervalUnion-based register allocators. +// +//===----------------------------------------------------------------------===// + +#include "RegAllocBase.h" +#include "Spiller.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Target/TargetRegisterInfo.h" +#ifndef NDEBUG +#include "llvm/ADT/SparseBitVector.h" +#endif +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Timer.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumNewQueued , "Number of new live ranges queued"); + +// Temporary verification option until we can put verification inside +// MachineVerifier. +static cl::opt<bool, true> +VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), + cl::desc("Verify during register allocation")); + +const char RegAllocBase::TimerGroupName[] = "Register Allocation"; +bool RegAllocBase::VerifyEnabled = false; + +//===----------------------------------------------------------------------===// +// RegAllocBase Implementation +//===----------------------------------------------------------------------===// + +// Pin the vtable to this file. +void RegAllocBase::anchor() {} + +void RegAllocBase::init(VirtRegMap &vrm, + LiveIntervals &lis, + LiveRegMatrix &mat) { + TRI = &vrm.getTargetRegInfo(); + MRI = &vrm.getRegInfo(); + VRM = &vrm; + LIS = &lis; + Matrix = &mat; + MRI->freezeReservedRegs(vrm.getMachineFunction()); + RegClassInfo.runOnMachineFunction(vrm.getMachineFunction()); +} + +// Visit all the live registers. If they are already assigned to a physical +// register, unify them with the corresponding LiveIntervalUnion, otherwise push +// them on the priority queue for later assignment. +void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (MRI->reg_nodbg_empty(Reg)) + continue; + enqueue(&LIS->getInterval(Reg)); + } +} + +// Top-level driver to manage the queue of unassigned VirtRegs and call the +// selectOrSplit implementation. +void RegAllocBase::allocatePhysRegs() { + seedLiveRegs(); + + // Continue assigning vregs one at a time to available physical registers. + while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + aboutToRemoveInterval(*VirtReg); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + Matrix->invalidateVirtRegs(); + + // selectOrSplit requests the allocator to return an available physical + // register if possible and populate a list of new live intervals that + // result from splitting. + DEBUG(dbgs() << "\nselectOrSplit " + << TRI->getRegClassName(MRI->getRegClass(VirtReg->reg)) + << ':' << *VirtReg << " w=" << VirtReg->weight << '\n'); + typedef SmallVector<unsigned, 4> VirtRegVec; + VirtRegVec SplitVRegs; + unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); + + if (AvailablePhysReg == ~0u) { + // selectOrSplit failed to find a register! + // Probably caused by an inline asm. + MachineInstr *MI = nullptr; + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *TmpMI = &*(I++); + if (TmpMI->isInlineAsm()) { + MI = TmpMI; + break; + } + } + if (MI) + MI->emitError("inline assembly requires more registers than available"); + else + report_fatal_error("ran out of registers during register allocation"); + // Keep going after reporting the error. + VRM->assignVirt2Phys(VirtReg->reg, + RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); + continue; + } + + if (AvailablePhysReg) + Matrix->assign(*VirtReg, AvailablePhysReg); + + for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); + I != E; ++I) { + LiveInterval *SplitVirtReg = &LIS->getInterval(*I); + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + aboutToRemoveInterval(*SplitVirtReg); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } + DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); + assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && + "expect split value in virtual register"); + enqueue(SplitVirtReg); + ++NumNewQueued; + } + } +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h new file mode 100644 index 0000000..659b8f5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -0,0 +1,112 @@ +//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RegAllocBase class, which is the skeleton of a basic +// register allocation algorithm and interface for extending it. It provides the +// building blocks on which to construct other experimental allocators and test +// the validity of two principles: +// +// - If virtual and physical register liveness is modeled using intervals, then +// on-the-fly interference checking is cheap. Furthermore, interferences can be +// lazily cached and reused. +// +// - Register allocation complexity, and generated code performance is +// determined by the effectiveness of live range splitting rather than optimal +// coloring. +// +// Following the first principle, interfering checking revolves around the +// LiveIntervalUnion data structure. +// +// To fulfill the second principle, the basic allocator provides a driver for +// incremental splitting. It essentially punts on the problem of register +// coloring, instead driving the assignment of virtual to physical registers by +// the cost of splitting. The basic allocator allows for heuristic reassignment +// of registers, if a more sophisticated allocator chooses to do that. +// +// This framework provides a way to engineer the compile time vs. code +// quality trade-off without relying on a particular theoretical solver. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_REGALLOCBASE_H +#define LLVM_LIB_CODEGEN_REGALLOCBASE_H + +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/RegisterClassInfo.h" + +namespace llvm { + +template<typename T> class SmallVectorImpl; +class TargetRegisterInfo; +class VirtRegMap; +class LiveIntervals; +class LiveRegMatrix; +class Spiller; + +/// RegAllocBase provides the register allocation driver and interface that can +/// be extended to add interesting heuristics. +/// +/// Register allocators must override the selectOrSplit() method to implement +/// live range splitting. They must also override enqueue/dequeue to provide an +/// assignment order. +class RegAllocBase { + virtual void anchor(); +protected: + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + VirtRegMap *VRM; + LiveIntervals *LIS; + LiveRegMatrix *Matrix; + RegisterClassInfo RegClassInfo; + + RegAllocBase() + : TRI(nullptr), MRI(nullptr), VRM(nullptr), LIS(nullptr), Matrix(nullptr) {} + + virtual ~RegAllocBase() {} + + // A RegAlloc pass should call this before allocatePhysRegs. + void init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat); + + // The top-level driver. The output is a VirtRegMap that us updated with + // physical register assignments. + void allocatePhysRegs(); + + // Get a temporary reference to a Spiller instance. + virtual Spiller &spiller() = 0; + + /// enqueue - Add VirtReg to the priority queue of unassigned registers. + virtual void enqueue(LiveInterval *LI) = 0; + + /// dequeue - Return the next unassigned register, or NULL. + virtual LiveInterval *dequeue() = 0; + + // A RegAlloc pass should override this to provide the allocation heuristics. + // Each call must guarantee forward progess by returning an available PhysReg + // or new set of split live virtual registers. It is up to the splitter to + // converge quickly toward fully spilled live ranges. + virtual unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &splitLVRs) = 0; + + // Use this group name for NamedRegionTimer. + static const char TimerGroupName[]; + + /// Method called when the allocator is about to remove a LiveInterval. + virtual void aboutToRemoveInterval(LiveInterval &LI) {} + +public: + /// VerifyEnabled - True when -verify-regalloc is given. + static bool VerifyEnabled; + +private: + void seedLiveRegs(); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp new file mode 100644 index 0000000..cfe367d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -0,0 +1,297 @@ +//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RABasic function pass, which provides a minimal +// implementation of the basic register allocator. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "AllocationOrder.h" +#include "LiveDebugVariables.h" +#include "RegAllocBase.h" +#include "Spiller.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cstdlib> +#include <queue> + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator", + createBasicRegisterAllocator); + +namespace { + struct CompSpillWeight { + bool operator()(LiveInterval *A, LiveInterval *B) const { + return A->weight < B->weight; + } + }; +} + +namespace { +/// RABasic provides a minimal implementation of the basic register allocation +/// algorithm. It prioritizes live virtual registers by spill weight and spills +/// whenever a register is unavailable. This is not practical in production but +/// provides a useful baseline both for measuring other allocators and comparing +/// the speed of the basic algorithm against other styles of allocators. +class RABasic : public MachineFunctionPass, public RegAllocBase +{ + // context + MachineFunction *MF; + + // state + std::unique_ptr<Spiller> SpillerInstance; + std::priority_queue<LiveInterval*, std::vector<LiveInterval*>, + CompSpillWeight> Queue; + + // Scratch space. Allocated here to avoid repeated malloc calls in + // selectOrSplit(). + BitVector UsableRegs; + +public: + RABasic(); + + /// Return the pass name. + const char* getPassName() const override { + return "Basic Register Allocator"; + } + + /// RABasic analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void releaseMemory() override; + + Spiller &spiller() override { return *SpillerInstance; } + + void enqueue(LiveInterval *LI) override { + Queue.push(LI); + } + + LiveInterval *dequeue() override { + if (Queue.empty()) + return nullptr; + LiveInterval *LI = Queue.top(); + Queue.pop(); + return LI; + } + + unsigned selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &SplitVRegs) override; + + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + // Helper for spilling all live virtual registers currently unified under preg + // that interfere with the most recently queried lvr. Return true if spilling + // was successful, and append any new spilled/split intervals to splitLVRs. + bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<unsigned> &SplitVRegs); + + static char ID; +}; + +char RABasic::ID = 0; + +} // end anonymous namespace + +RABasic::RABasic(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); +} + +void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequiredID(MachineDominatorsID); + AU.addPreservedID(MachineDominatorsID); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RABasic::releaseMemory() { + SpillerInstance.reset(); +} + + +// Spill or split all live virtual registers currently unified under PhysReg +// that interfere with VirtReg. The newly spilled or split live intervals are +// returned by appending them to SplitVRegs. +bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<unsigned> &SplitVRegs) { + // Record each interference and determine if all are spillable before mutating + // either the union or live intervals. + SmallVector<LiveInterval*, 8> Intfs; + + // Collect interferences assigned to any alias of the physical register. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + Q.collectInterferingVRegs(); + if (Q.seenUnspillableVReg()) + return false; + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + if (!Intf->isSpillable() || Intf->weight > VirtReg.weight) + return false; + Intfs.push_back(Intf); + } + } + DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) << + " interferences with " << VirtReg << "\n"); + assert(!Intfs.empty() && "expected interference"); + + // Spill each interfering vreg allocated to PhysReg or an alias. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval &Spill = *Intfs[i]; + + // Skip duplicates. + if (!VRM->hasPhys(Spill.reg)) + continue; + + // Deallocate the interfering vreg by removing it from the union. + // A LiveInterval instance may not be in a union during modification! + Matrix->unassign(Spill); + + // Spill the extracted interval. + LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + } + return true; +} + +// Driver for the register assignment and splitting heuristics. +// Manages iteration over the LiveIntervalUnions. +// +// This is a minimal implementation of register assignment and splitting that +// spills whenever we run out of registers. +// +// selectOrSplit can only be called once per live virtual register. We then do a +// single interference test for each register the correct class until we find an +// available register. So, the number of interference tests in the worst case is +// |vregs| * |machineregs|. And since the number of interference tests is +// minimal, there is no value in caching them outside the scope of +// selectOrSplit(). +unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &SplitVRegs) { + // Populate a list of physical register spill candidates. + SmallVector<unsigned, 8> PhysRegSpillCands; + + // Check for an available register in this class. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); + while (unsigned PhysReg = Order.next()) { + // Check for interference in PhysReg + switch (Matrix->checkInterference(VirtReg, PhysReg)) { + case LiveRegMatrix::IK_Free: + // PhysReg is available, allocate it. + return PhysReg; + + case LiveRegMatrix::IK_VirtReg: + // Only virtual registers in the way, we may be able to spill them. + PhysRegSpillCands.push_back(PhysReg); + continue; + + default: + // RegMask or RegUnit interference. + continue; + } + } + + // Try to spill another interfering reg with less spill weight. + for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(), + PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) { + if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) + continue; + + assert(!Matrix->checkInterference(VirtReg, *PhysRegI) && + "Interference after spill."); + // Tell the caller to allocate to this newly freed physical register. + return *PhysRegI; + } + + // No other spill candidates were found, so spill the current VirtReg. + DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); + if (!VirtReg.isSpillable()) + return ~0u; + LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM); + spiller().spill(LRE); + + // The live virtual register requesting allocation was spilled, so tell + // the caller not to allocate anything during this round. + return 0; +} + +bool RABasic::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n" + << "********** Function: " + << mf.getName() << '\n'); + + MF = &mf; + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); + + calculateSpillWeightsAndHints(*LIS, *MF, VRM, + getAnalysis<MachineLoopInfo>(), + getAnalysis<MachineBlockFrequencyInfo>()); + + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); + + allocatePhysRegs(); + + // Diagnostic output before rewriting + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n"); + + releaseMemory(); + return true; +} + +FunctionPass* llvm::createBasicRegisterAllocator() +{ + return new RABasic(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp new file mode 100644 index 0000000..f4c076f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -0,0 +1,1109 @@ +//===-- RegAllocFast.cpp - A fast register allocator for debug code -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This register allocator allocates registers to a basic block at a time, +// attempting to keep values in registers and reusing registers as appropriate. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumStores, "Number of stores added"); +STATISTIC(NumLoads , "Number of loads added"); +STATISTIC(NumCopies, "Number of copies coalesced"); + +static RegisterRegAlloc + fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); + +namespace { + class RAFast : public MachineFunctionPass { + public: + static char ID; + RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1), + isBulkSpilling(false) {} + private: + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + RegisterClassInfo RegClassInfo; + + // Basic block currently being allocated. + MachineBasicBlock *MBB; + + // StackSlotForVirtReg - Maps virtual regs to the frame index where these + // values are spilled. + IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg; + + // Everything we know about a live virtual register. + struct LiveReg { + MachineInstr *LastUse; // Last instr to use reg. + unsigned VirtReg; // Virtual register number. + unsigned PhysReg; // Currently held here. + unsigned short LastOpNum; // OpNum on LastUse. + bool Dirty; // Register needs spill. + + explicit LiveReg(unsigned v) + : LastUse(nullptr), VirtReg(v), PhysReg(0), LastOpNum(0), Dirty(false){} + + unsigned getSparseSetIndex() const { + return TargetRegisterInfo::virtReg2Index(VirtReg); + } + }; + + typedef SparseSet<LiveReg> LiveRegMap; + + // LiveVirtRegs - This map contains entries for each virtual register + // that is currently available in a physical register. + LiveRegMap LiveVirtRegs; + + DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap; + + // RegState - Track the state of a physical register. + enum RegState { + // A disabled register is not available for allocation, but an alias may + // be in use. A register can only be moved out of the disabled state if + // all aliases are disabled. + regDisabled, + + // A free register is not currently in use and can be allocated + // immediately without checking aliases. + regFree, + + // A reserved register has been assigned explicitly (e.g., setting up a + // call parameter), and it remains reserved until it is used. + regReserved + + // A register state may also be a virtual register number, indication that + // the physical register is currently allocated to a virtual register. In + // that case, LiveVirtRegs contains the inverse mapping. + }; + + // PhysRegState - One of the RegState enums, or a virtreg. + std::vector<unsigned> PhysRegState; + + // Set of register units. + typedef SparseSet<unsigned> UsedInInstrSet; + + // Set of register units that are used in the current instruction, and so + // cannot be allocated. + UsedInInstrSet UsedInInstr; + + // Mark a physreg as used in this instruction. + void markRegUsedInInstr(unsigned PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.insert(*Units); + } + + // Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(unsigned PhysReg) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (UsedInInstr.count(*Units)) + return true; + return false; + } + + // SkippedInstrs - Descriptors of instructions whose clobber list was + // ignored because all registers were spilled. It is still necessary to + // mark all the clobbered registers as used by the function. + SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs; + + // isBulkSpilling - This flag is set when LiveRegMap will be cleared + // completely after spilling all live registers. LiveRegMap entries should + // not be erased. + bool isBulkSpilling; + + enum : unsigned { + spillClean = 1, + spillDirty = 100, + spillImpossible = ~0u + }; + public: + const char *getPassName() const override { + return "Fast Register Allocator"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + private: + bool runOnMachineFunction(MachineFunction &Fn) override; + void AllocateBasicBlock(); + void handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead); + int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); + bool isLastUseOfLocalReg(MachineOperand&); + + void addKillFlag(const LiveReg&); + void killVirtReg(LiveRegMap::iterator); + void killVirtReg(unsigned VirtReg); + void spillVirtReg(MachineBasicBlock::iterator MI, LiveRegMap::iterator); + void spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg); + + void usePhysReg(MachineOperand&); + void definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState); + unsigned calcSpillCost(unsigned PhysReg) const; + void assignVirtToPhysReg(LiveReg&, unsigned PhysReg); + LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const { + return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg)); + } + LiveRegMap::iterator assignVirtToPhysReg(unsigned VReg, unsigned PhysReg); + LiveRegMap::iterator allocVirtReg(MachineInstr *MI, LiveRegMap::iterator, + unsigned Hint); + LiveRegMap::iterator defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + LiveRegMap::iterator reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint); + void spillAll(MachineBasicBlock::iterator MI); + bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); + }; + char RAFast::ID = 0; +} + +/// getStackSpaceFor - This allocates space for the specified virtual register +/// to be held on the stack. +int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { + // Find the location Reg would belong... + int SS = StackSlotForVirtReg[VirtReg]; + if (SS != -1) + return SS; // Already has space allocated? + + // Allocate a new stack object for this spill location... + int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + + // Assign the slot. + StackSlotForVirtReg[VirtReg] = FrameIdx; + return FrameIdx; +} + +/// isLastUseOfLocalReg - Return true if MO is the only remaining reference to +/// its virtual register, and it is guaranteed to be a block-local register. +/// +bool RAFast::isLastUseOfLocalReg(MachineOperand &MO) { + // If the register has ever been spilled or reloaded, we conservatively assume + // it is a global register used in multiple blocks. + if (StackSlotForVirtReg[MO.getReg()] != -1) + return false; + + // Check that the use/def chain has exactly one operand - MO. + MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); + if (&*I != &MO) + return false; + return ++I == MRI->reg_nodbg_end(); +} + +/// addKillFlag - Set kill flags on last use of a virtual register. +void RAFast::addKillFlag(const LiveReg &LR) { + if (!LR.LastUse) return; + MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); + if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { + if (MO.getReg() == LR.PhysReg) + MO.setIsKill(); + else + LR.LastUse->addRegisterKilled(LR.PhysReg, TRI, true); + } +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(LiveRegMap::iterator LRI) { + addKillFlag(*LRI); + assert(PhysRegState[LRI->PhysReg] == LRI->VirtReg && + "Broken RegState mapping"); + PhysRegState[LRI->PhysReg] = regFree; + // Erase from LiveVirtRegs unless we're spilling in bulk. + if (!isBulkSpilling) + LiveVirtRegs.erase(LRI); +} + +/// killVirtReg - Mark virtreg as no longer available. +void RAFast::killVirtReg(unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "killVirtReg needs a virtual register"); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + if (LRI != LiveVirtRegs.end()) + killVirtReg(LRI); +} + +/// spillVirtReg - This method spills the value specified by VirtReg into the +/// corresponding stack slot if needed. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, unsigned VirtReg) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Spilling a physical register is illegal!"); + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "Spilling unmapped virtual register"); + spillVirtReg(MI, LRI); +} + +/// spillVirtReg - Do the actual work of spilling. +void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, + LiveRegMap::iterator LRI) { + LiveReg &LR = *LRI; + assert(PhysRegState[LR.PhysReg] == LRI->VirtReg && "Broken RegState mapping"); + + if (LR.Dirty) { + // If this physreg is used by the instruction, we want to kill it on the + // instruction, not on the spill. + bool SpillKill = LR.LastUse != MI; + LR.Dirty = false; + DEBUG(dbgs() << "Spilling " << PrintReg(LRI->VirtReg, TRI) + << " in " << PrintReg(LR.PhysReg, TRI)); + const TargetRegisterClass *RC = MRI->getRegClass(LRI->VirtReg); + int FI = getStackSpaceFor(LRI->VirtReg, RC); + DEBUG(dbgs() << " to stack slot #" << FI << "\n"); + TII->storeRegToStackSlot(*MBB, MI, LR.PhysReg, SpillKill, FI, RC, TRI); + ++NumStores; // Update statistics + + // If this register is used by DBG_VALUE then insert new DBG_VALUE to + // identify spilled location as the place to find corresponding variable's + // value. + SmallVectorImpl<MachineInstr *> &LRIDbgValues = + LiveDbgValueMap[LRI->VirtReg]; + for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { + MachineInstr *DBG = LRIDbgValues[li]; + const MDNode *Var = DBG->getDebugVariable(); + const MDNode *Expr = DBG->getDebugExpression(); + bool IsIndirect = DBG->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; + DebugLoc DL = DBG->getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + MachineInstr *NewDV = + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(FI) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); + assert(NewDV->getParent() == MBB && "dangling parent pointer"); + (void)NewDV; + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); + } + // Now this register is spilled there is should not be any DBG_VALUE + // pointing to this register because they are all pointing to spilled value + // now. + LRIDbgValues.clear(); + if (SpillKill) + LR.LastUse = nullptr; // Don't kill register again + } + killVirtReg(LRI); +} + +/// spillAll - Spill all dirty virtregs without killing them. +void RAFast::spillAll(MachineBasicBlock::iterator MI) { + if (LiveVirtRegs.empty()) return; + isBulkSpilling = true; + // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order + // of spilling here is deterministic, if arbitrary. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), e = LiveVirtRegs.end(); + i != e; ++i) + spillVirtReg(MI, i); + LiveVirtRegs.clear(); + isBulkSpilling = false; +} + +/// usePhysReg - Handle the direct use of a physical register. +/// Check that the register is not used by a virtreg. +/// Kill the physreg, marking it free. +/// This may add implicit kills to MO->getParent() and invalidate MO. +void RAFast::usePhysReg(MachineOperand &MO) { + unsigned PhysReg = MO.getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Bad usePhysReg operand"); + markRegUsedInInstr(PhysReg); + switch (PhysRegState[PhysReg]) { + case regDisabled: + break; + case regReserved: + PhysRegState[PhysReg] = regFree; + // Fall through + case regFree: + MO.setIsKill(); + return; + default: + // The physreg was allocated to a virtual register. That means the value we + // wanted has been clobbered. + llvm_unreachable("Instruction uses an allocated register"); + } + + // Maybe a superregister is reserved? + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; + switch (PhysRegState[Alias]) { + case regDisabled: + break; + case regReserved: + // Either PhysReg is a subregister of Alias and we mark the + // whole register as free, or PhysReg is the superregister of + // Alias and we mark all the aliases as disabled before freeing + // PhysReg. + // In the latter case, since PhysReg was disabled, this means that + // its value is defined only by physical sub-registers. This check + // is performed by the assert of the default case in this loop. + // Note: The value of the superregister may only be partial + // defined, that is why regDisabled is a valid state for aliases. + assert((TRI->isSuperRegister(PhysReg, Alias) || + TRI->isSuperRegister(Alias, PhysReg)) && + "Instruction is not using a subregister of a reserved register"); + // Fall through. + case regFree: + if (TRI->isSuperRegister(PhysReg, Alias)) { + // Leave the superregister in the working set. + PhysRegState[Alias] = regFree; + MO.getParent()->addRegisterKilled(Alias, TRI, true); + return; + } + // Some other alias was in the working set - clear it. + PhysRegState[Alias] = regDisabled; + break; + default: + llvm_unreachable("Instruction uses an alias of an allocated register"); + } + } + + // All aliases are disabled, bring register into working set. + PhysRegState[PhysReg] = regFree; + MO.setIsKill(); +} + +/// definePhysReg - Mark PhysReg as reserved or free after spilling any +/// virtregs. This is very similar to defineVirtReg except the physreg is +/// reserved instead of allocated. +void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, + RegState NewState) { + markRegUsedInInstr(PhysReg); + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[PhysReg] = NewState; + return; + } + + // This is a disabled register, disable all aliases. + PhysRegState[PhysReg] = NewState; + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + default: + spillVirtReg(MI, VirtReg); + // Fall through. + case regFree: + case regReserved: + PhysRegState[Alias] = regDisabled; + if (TRI->isSuperRegister(PhysReg, Alias)) + return; + break; + } + } +} + + +// calcSpillCost - Return the cost of spilling clearing out PhysReg and +// aliases so it is free for allocation. +// Returns 0 when PhysReg is free or disabled with all aliases disabled - it +// can be allocated directly. +// Returns spillImpossible when PhysReg or an alias can't be spilled. +unsigned RAFast::calcSpillCost(unsigned PhysReg) const { + if (isRegUsedInInstr(PhysReg)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); + return spillImpossible; + } + switch (unsigned VirtReg = PhysRegState[PhysReg]) { + case regDisabled: + break; + case regFree: + return 0; + case regReserved: + DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding " + << PrintReg(PhysReg, TRI) << " is reserved already.\n"); + return spillImpossible; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + return I->Dirty ? spillDirty : spillClean; + } + } + + // This is a disabled register, add up cost of aliases. + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n"); + unsigned Cost = 0; + for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { + unsigned Alias = *AI; + switch (unsigned VirtReg = PhysRegState[Alias]) { + case regDisabled: + break; + case regFree: + ++Cost; + break; + case regReserved: + return spillImpossible; + default: { + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + Cost += I->Dirty ? spillDirty : spillClean; + break; + } + } + } + return Cost; +} + + +/// assignVirtToPhysReg - This method updates local state so that we know +/// that PhysReg is the proper container for VirtReg now. The physical +/// register must not be used for anything else when this is called. +/// +void RAFast::assignVirtToPhysReg(LiveReg &LR, unsigned PhysReg) { + DEBUG(dbgs() << "Assigning " << PrintReg(LR.VirtReg, TRI) << " to " + << PrintReg(PhysReg, TRI) << "\n"); + PhysRegState[PhysReg] = LR.VirtReg; + assert(!LR.PhysReg && "Already assigned a physreg"); + LR.PhysReg = PhysReg; +} + +RAFast::LiveRegMap::iterator +RAFast::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "VirtReg disappeared"); + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; +} + +/// allocVirtReg - Allocate a physical register for VirtReg. +RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, + LiveRegMap::iterator LRI, + unsigned Hint) { + const unsigned VirtReg = LRI->VirtReg; + + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Can only allocate virtual registers"); + + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + + // Ignore invalid hints. + if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || + !RC->contains(Hint) || !MRI->isAllocatable(Hint))) + Hint = 0; + + // Take hint when possible. + if (Hint) { + // Ignore the hint if we would have to spill a dirty register. + unsigned Cost = calcSpillCost(Hint); + if (Cost < spillDirty) { + if (Cost) + definePhysReg(MI, Hint, regFree); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, Hint); + } + } + + ArrayRef<MCPhysReg> AO = RegClassInfo.getOrder(RC); + + // First try to find a completely free register. + for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ + unsigned PhysReg = *I; + if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { + assignVirtToPhysReg(*LRI, PhysReg); + return LRI; + } + } + + DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from " + << TRI->getRegClassName(RC) << "\n"); + + unsigned BestReg = 0, BestCost = spillImpossible; + for (ArrayRef<MCPhysReg>::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ + unsigned Cost = calcSpillCost(*I); + DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); + DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); + // Cost is 0 when all aliases are already disabled. + if (Cost == 0) { + assignVirtToPhysReg(*LRI, *I); + return LRI; + } + if (Cost < BestCost) + BestReg = *I, BestCost = Cost; + } + + if (BestReg) { + definePhysReg(MI, BestReg, regFree); + // definePhysReg may kill virtual registers and modify LiveVirtRegs. + // That invalidates LRI, so run a new lookup for VirtReg. + return assignVirtToPhysReg(VirtReg, BestReg); + } + + // Nothing we can do. Report an error and keep going with a bad allocation. + if (MI->isInlineAsm()) + MI->emitError("inline assembly requires more registers than available"); + else + MI->emitError("ran out of registers during register allocation"); + definePhysReg(MI, *AO.begin(), regFree); + return assignVirtToPhysReg(VirtReg, *AO.begin()); +} + +/// defineVirtReg - Allocate a register for VirtReg and mark it as dirty. +RAFast::LiveRegMap::iterator +RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + if (New) { + // If there is no hint, peek at the only use of this register. + if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && + MRI->hasOneNonDBGUse(VirtReg)) { + const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); + // It's a copy, use the destination register as a hint. + if (UseMI.isCopyLike()) + Hint = UseMI.getOperand(0).getReg(); + } + LRI = allocVirtReg(MI, LRI, Hint); + } else if (LRI->LastUse) { + // Redefining a live register - kill at the last use, unless it is this + // instruction defining VirtReg multiple times. + if (LRI->LastUse != MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) + addKillFlag(*LRI); + } + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + LRI->Dirty = true; + markRegUsedInInstr(LRI->PhysReg); + return LRI; +} + +/// reloadVirtReg - Make sure VirtReg is available in a physreg and return it. +RAFast::LiveRegMap::iterator +RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, + unsigned VirtReg, unsigned Hint) { + assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && + "Not a virtual register"); + LiveRegMap::iterator LRI; + bool New; + std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); + MachineOperand &MO = MI->getOperand(OpNum); + if (New) { + LRI = allocVirtReg(MI, LRI, Hint); + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); + int FrameIndex = getStackSpaceFor(VirtReg, RC); + DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into " + << PrintReg(LRI->PhysReg, TRI) << "\n"); + TII->loadRegFromStackSlot(*MBB, MI, LRI->PhysReg, FrameIndex, RC, TRI); + ++NumLoads; + } else if (LRI->Dirty) { + if (isLastUseOfLocalReg(MO)) { + DEBUG(dbgs() << "Killing last use: " << MO << "\n"); + if (MO.isUse()) + MO.setIsKill(); + else + MO.setIsDead(); + } else if (MO.isKill()) { + DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); + MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + MO.setIsDead(false); + } + } else if (MO.isKill()) { + // We must remove kill flags from uses of reloaded registers because the + // register would be killed immediately, and there might be a second use: + // %foo = OR %x<kill>, %x + // This would cause a second reload of %x into a different register. + DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); + MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + MO.setIsDead(false); + } + assert(LRI->PhysReg && "Register not assigned"); + LRI->LastUse = MI; + LRI->LastOpNum = OpNum; + markRegUsedInInstr(LRI->PhysReg); + return LRI; +} + +// setPhysReg - Change operand OpNum in MI the refer the PhysReg, considering +// subregs. This may invalidate any operand pointers. +// Return true if the operand kills its register. +bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { + MachineOperand &MO = MI->getOperand(OpNum); + bool Dead = MO.isDead(); + if (!MO.getSubReg()) { + MO.setReg(PhysReg); + return MO.isKill() || Dead; + } + + // Handle subregister index. + MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0); + MO.setSubReg(0); + + // A kill flag implies killing the full register. Add corresponding super + // register kill. + if (MO.isKill()) { + MI->addRegisterKilled(PhysReg, TRI, true); + return true; + } + + // A <def,read-undef> of a sub-register requires an implicit def of the full + // register. + if (MO.isDef() && MO.isUndef()) + MI->addRegisterDefined(PhysReg, TRI); + + return Dead; +} + +// Handle special instruction operand like early clobbers and tied ops when +// there are additional physreg defines. +void RAFast::handleThroughOperands(MachineInstr *MI, + SmallVectorImpl<unsigned> &VirtDead) { + DEBUG(dbgs() << "Scanning for through registers:"); + SmallSet<unsigned, 8> ThroughRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || + (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (ThroughRegs.insert(Reg).second) + DEBUG(dbgs() << ' ' << PrintReg(Reg)); + } + } + + // If any physreg defines collide with preallocated through registers, + // we must spill and reallocate. + DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + markRegUsedInInstr(Reg); + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (ThroughRegs.count(PhysRegState[*AI])) + definePhysReg(MI, *AI, regFree); + } + } + + SmallVector<unsigned, 8> PartialDefs; + DEBUG(dbgs() << "Allocating tied uses.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (MO.isUse()) { + unsigned DefIdx = 0; + if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; + DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " + << DefIdx << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->PhysReg; + setPhysReg(MI, i, PhysReg); + // Note: we don't update the def operand yet. That would cause the normal + // def-scan to attempt spilling. + } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + // Reload the register, but don't assign to the operand just yet. + // That would confuse the later phys-def processing pass. + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + PartialDefs.push_back(LRI->PhysReg); + } + } + + DEBUG(dbgs() << "Allocating early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (!MO.isEarlyClobber()) + continue; + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + + // Restore UsedInInstr to a state usable for allocating normal virtual uses. + UsedInInstr.clear(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) + << " as used in instr\n"); + markRegUsedInInstr(Reg); + } + + // Also mark PartialDefs as used to avoid reallocation. + for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) + markRegUsedInInstr(PartialDefs[i]); +} + +void RAFast::AllocateBasicBlock() { + DEBUG(dbgs() << "\nAllocating " << *MBB); + + PhysRegState.assign(TRI->getNumRegs(), regDisabled); + assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); + + MachineBasicBlock::iterator MII = MBB->begin(); + + // Add live-in registers as live. + for (const auto &LI : MBB->liveins()) + if (MRI->isAllocatable(LI.PhysReg)) + definePhysReg(MII, LI.PhysReg, regReserved); + + SmallVector<unsigned, 8> VirtDead; + SmallVector<MachineInstr*, 32> Coalesced; + + // Otherwise, sequentially allocate each instruction in the MBB. + while (MII != MBB->end()) { + MachineInstr *MI = MII++; + const MCInstrDesc &MCID = MI->getDesc(); + DEBUG({ + dbgs() << "\n>> " << *MI << "Regs:"; + for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { + if (PhysRegState[Reg] == regDisabled) continue; + dbgs() << " " << TRI->getName(Reg); + switch(PhysRegState[Reg]) { + case regFree: + break; + case regReserved: + dbgs() << "*"; + break; + default: { + dbgs() << '=' << PrintReg(PhysRegState[Reg]); + LiveRegMap::iterator I = findLiveVirtReg(PhysRegState[Reg]); + assert(I != LiveVirtRegs.end() && "Missing VirtReg entry"); + if (I->Dirty) + dbgs() << "*"; + assert(I->PhysReg == Reg && "Bad inverse map"); + break; + } + } + } + dbgs() << '\n'; + // Check that LiveVirtRegs is the inverse. + for (LiveRegMap::iterator i = LiveVirtRegs.begin(), + e = LiveVirtRegs.end(); i != e; ++i) { + assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) && + "Bad map key"); + assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) && + "Bad map value"); + assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + } + }); + + // Debug values are not allowed to change codegen in any way. + if (MI->isDebugValue()) { + bool ScanDbgValue = true; + while (ScanDbgValue) { + ScanDbgValue = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + LiveRegMap::iterator LRI = findLiveVirtReg(Reg); + if (LRI != LiveVirtRegs.end()) + setPhysReg(MI, i, LRI->PhysReg); + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) { + // We can't allocate a physreg for a DebugValue, sorry! + DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); + MO.setReg(0); + } + else { + // Modify DBG_VALUE now that the value is in a spill slot. + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + const MDNode *Var = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock *MBB = MI->getParent(); + assert( + cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, + TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SS) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); + DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; + } + } + LiveDbgValueMap[Reg].push_back(MI); + } + } + // Next instruction. + continue; + } + + // If this is a copy, we may be able to coalesce. + unsigned CopySrc = 0, CopyDst = 0, CopySrcSub = 0, CopyDstSub = 0; + if (MI->isCopy()) { + CopyDst = MI->getOperand(0).getReg(); + CopySrc = MI->getOperand(1).getReg(); + CopyDstSub = MI->getOperand(0).getSubReg(); + CopySrcSub = MI->getOperand(1).getSubReg(); + } + + // Track registers used by instruction. + UsedInInstr.clear(); + + // First scan. + // Mark physreg uses and early clobbers as used. + // Find the end of the virtreg operands + unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) { + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + continue; + } + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } + continue; + } + if (!MRI->isAllocatable(Reg)) continue; + if (MO.isUse()) { + usePhysReg(MO); + } else if (MO.isEarlyClobber()) { + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // Similarly, if there are multiple defs and tied operands, we must make + // sure the same register is allocated to uses and defs. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDst = 0; + // Pretend we have early clobbers so the use operands get marked below. + // This is not necessary for the common case of a single tied use. + hasEarlyClobbers = true; + } + + // Second scan. + // Allocate virtreg uses. + for (unsigned i = 0; i != VirtOpEnd; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; + if (MO.isUse()) { + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst); + unsigned PhysReg = LRI->PhysReg; + CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; + if (setPhysReg(MI, i, PhysReg)) + killVirtReg(LRI); + } + } + + // Track registers defined by instruction - early clobbers and tied uses at + // this point. + UsedInInstr.clear(); + if (hasEarlyClobbers) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + // Look for physreg defs and tied uses. + if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; + markRegUsedInInstr(Reg); + } + } + + unsigned DefOpEnd = MI->getNumOperands(); + if (MI->isCall()) { + // Spill all virtregs before a call. This serves two purposes: 1. If an + // exception is thrown, the landing pad is going to expect to find + // registers in their spill slots, and 2. we don't have to wade through + // all the <imp-def> operands on the call instruction. + DefOpEnd = VirtOpEnd; + DEBUG(dbgs() << " Spilling remaining registers before call.\n"); + spillAll(MI); + + // The imp-defs are skipped below, but we still need to mark those + // registers as used by the function. + SkippedInstrs.insert(&MCID); + } + + // Third scan. + // Allocate defs and collect dead defs. + for (unsigned i = 0; i != DefOpEnd; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; + unsigned Reg = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (!MRI->isAllocatable(Reg)) continue; + definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); + continue; + } + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, CopySrc); + unsigned PhysReg = LRI->PhysReg; + if (setPhysReg(MI, i, PhysReg)) { + VirtDead.push_back(Reg); + CopyDst = 0; // cancel coalescing; + } else + CopyDst = (CopyDst == Reg || CopyDst == PhysReg) ? PhysReg : 0; + } + + // Kill dead defs after the scan to ensure that multiple defs of the same + // register are allocated identically. We didn't need to do this for uses + // because we are crerating our own kill flags, and they are always at the + // last use. + for (unsigned i = 0, e = VirtDead.size(); i != e; ++i) + killVirtReg(VirtDead[i]); + VirtDead.clear(); + + if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { + DEBUG(dbgs() << "-- coalescing: " << *MI); + Coalesced.push_back(MI); + } else { + DEBUG(dbgs() << "<< " << *MI); + } + } + + // Spill all physical registers holding virtual registers now. + DEBUG(dbgs() << "Spilling live registers at end of block.\n"); + spillAll(MBB->getFirstTerminator()); + + // Erase all the coalesced copies. We are delaying it until now because + // LiveVirtRegs might refer to the instrs. + for (unsigned i = 0, e = Coalesced.size(); i != e; ++i) + MBB->erase(Coalesced[i]); + NumCopies += Coalesced.size(); + + DEBUG(MBB->dump()); +} + +/// runOnMachineFunction - Register allocate the whole function +/// +bool RAFast::runOnMachineFunction(MachineFunction &Fn) { + DEBUG(dbgs() << "********** FAST REGISTER ALLOCATION **********\n" + << "********** Function: " << Fn.getName() << '\n'); + MF = &Fn; + MRI = &MF->getRegInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + MRI->freezeReservedRegs(Fn); + RegClassInfo.runOnMachineFunction(Fn); + UsedInInstr.clear(); + UsedInInstr.setUniverse(TRI->getNumRegUnits()); + + assert(!MRI->isSSA() && "regalloc requires leaving SSA"); + + // initialize the virtual->physical register map to have a 'null' + // mapping for all virtual registers + StackSlotForVirtReg.resize(MRI->getNumVirtRegs()); + LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); + + // Loop over all of the basic blocks, eliminating virtual register references + for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end(); + MBBi != MBBe; ++MBBi) { + MBB = &*MBBi; + AllocateBasicBlock(); + } + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers. + MRI->clearVirtRegs(); + + SkippedInstrs.clear(); + StackSlotForVirtReg.clear(); + LiveDbgValueMap.clear(); + return true; +} + +FunctionPass *llvm::createFastRegisterAllocator() { + return new RAFast(); +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp new file mode 100644 index 0000000..945cb9e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -0,0 +1,2606 @@ +//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the RAGreedy function pass for register allocation in +// optimized builds. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "AllocationOrder.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" +#include "RegAllocBase.h" +#include "SpillPlacement.h" +#include "Spiller.h" +#include "SplitKit.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <queue> + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumGlobalSplits, "Number of split global live ranges"); +STATISTIC(NumLocalSplits, "Number of split local live ranges"); +STATISTIC(NumEvicted, "Number of interferences evicted"); + +static cl::opt<SplitEditor::ComplementSpillMode> +SplitSpillMode("split-spill-mode", cl::Hidden, + cl::desc("Spill mode for splitting live ranges"), + cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), + clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), + clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), + clEnumValEnd), + cl::init(SplitEditor::SM_Partition)); + +static cl::opt<unsigned> +LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, + cl::desc("Last chance recoloring max depth"), + cl::init(5)); + +static cl::opt<unsigned> LastChanceRecoloringMaxInterference( + "lcr-max-interf", cl::Hidden, + cl::desc("Last chance recoloring maximum number of considered" + " interference at a time"), + cl::init(8)); + +static cl::opt<bool> +ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, + cl::desc("Exhaustive Search for registers bypassing the depth " + "and interference cutoffs of last chance recoloring")); + +static cl::opt<bool> EnableLocalReassignment( + "enable-local-reassign", cl::Hidden, + cl::desc("Local reassignment can yield better allocation decisions, but " + "may be compile time intensive"), + cl::init(false)); + +static cl::opt<bool> EnableDeferredSpilling( + "enable-deferred-spilling", cl::Hidden, + cl::desc("Instead of spilling a variable right away, defer the actual " + "code insertion to the end of the allocation. That way the " + "allocator might still find a suitable coloring for this " + "variable because of other evicted variables."), + cl::init(false)); + +// FIXME: Find a good default for this flag and remove the flag. +static cl::opt<unsigned> +CSRFirstTimeCost("regalloc-csr-first-time-cost", + cl::desc("Cost for first time use of callee-saved register."), + cl::init(0), cl::Hidden); + +static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", + createGreedyRegisterAllocator); + +namespace { +class RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + // Convenient shortcuts. + typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; + typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; + typedef SmallSet<unsigned, 16> SmallVirtRegSet; + + // context + MachineFunction *MF; + + // Shortcuts to some useful interface. + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + RegisterClassInfo RCI; + + // analyses + SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; + MachineDominatorTree *DomTree; + MachineLoopInfo *Loops; + EdgeBundles *Bundles; + SpillPlacement *SpillPlacer; + LiveDebugVariables *DebugVars; + + // state + std::unique_ptr<Spiller> SpillerInstance; + PQueue Queue; + unsigned NextCascade; + + // Live ranges pass through a number of stages as we try to allocate them. + // Some of the stages may also create new live ranges: + // + // - Region splitting. + // - Per-block splitting. + // - Local splitting. + // - Spilling. + // + // Ranges produced by one of the stages skip the previous stages when they are + // dequeued. This improves performance because we can skip interference checks + // that are unlikely to give any results. It also guarantees that the live + // range splitting algorithm terminates, something that is otherwise hard to + // ensure. + enum LiveRangeStage { + /// Newly created live range that has never been queued. + RS_New, + + /// Only attempt assignment and eviction. Then requeue as RS_Split. + RS_Assign, + + /// Attempt live range splitting if assignment is impossible. + RS_Split, + + /// Attempt more aggressive live range splitting that is guaranteed to make + /// progress. This is used for split products that may not be making + /// progress. + RS_Split2, + + /// Live range will be spilled. No more splitting will be attempted. + RS_Spill, + + + /// Live range is in memory. Because of other evictions, it might get moved + /// in a register in the end. + RS_Memory, + + /// There is nothing more we can do to this live range. Abort compilation + /// if it can't be assigned. + RS_Done + }; + + // Enum CutOffStage to keep a track whether the register allocation failed + // because of the cutoffs encountered in last chance recoloring. + // Note: This is used as bitmask. New value should be next power of 2. + enum CutOffStage { + // No cutoffs encountered + CO_None = 0, + + // lcr-max-depth cutoff encountered + CO_Depth = 1, + + // lcr-max-interf cutoff encountered + CO_Interf = 2 + }; + + uint8_t CutOffInfo; + +#ifndef NDEBUG + static const char *const StageName[]; +#endif + + // RegInfo - Keep additional information about each live range. + struct RegInfo { + LiveRangeStage Stage; + + // Cascade - Eviction loop prevention. See canEvictInterference(). + unsigned Cascade; + + RegInfo() : Stage(RS_New), Cascade(0) {} + }; + + IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return ExtraRegInfo[VirtReg.reg].Stage; + } + + void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + ExtraRegInfo[VirtReg.reg].Stage = Stage; + } + + template<typename Iterator> + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + for (;Begin != End; ++Begin) { + unsigned Reg = *Begin; + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = NewStage; + } + } + + /// Cost of evicting interference. + struct EvictionCost { + unsigned BrokenHints; ///< Total number of broken hints. + float MaxWeight; ///< Maximum spill weight evicted. + + EvictionCost(): BrokenHints(0), MaxWeight(0) {} + + bool isMax() const { return BrokenHints == ~0u; } + + void setMax() { BrokenHints = ~0u; } + + void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } + + bool operator<(const EvictionCost &O) const { + return std::tie(BrokenHints, MaxWeight) < + std::tie(O.BrokenHints, O.MaxWeight); + } + }; + + // splitting state. + std::unique_ptr<SplitAnalysis> SA; + std::unique_ptr<SplitEditor> SE; + + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + // Register intended for assignment, or 0. + unsigned PhysReg; + + // SplitKit interval index for this candidate. + unsigned IntvIdx; + + // Interference for PhysReg. + InterferenceCache::Cursor Intf; + + // Bundles where this candidate should be live. + BitVector LiveBundles; + SmallVector<unsigned, 8> ActiveBlocks; + + void reset(InterferenceCache &Cache, unsigned Reg) { + PhysReg = Reg; + IntvIdx = 0; + Intf.setPhysReg(Cache, Reg); + LiveBundles.clear(); + ActiveBlocks.clear(); + } + + // Set B[i] = C for every live bundle where B[i] was NoCand. + unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { + unsigned Count = 0; + for (int i = LiveBundles.find_first(); i >= 0; + i = LiveBundles.find_next(i)) + if (B[i] == NoCand) { + B[i] = C; + Count++; + } + return Count; + } + }; + + /// Candidate info for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector<GlobalSplitCandidate, 32> GlobalCand; + + enum : unsigned { NoCand = ~0u }; + + /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to + /// NoCand which indicates the stack interval. + SmallVector<unsigned, 32> BundleCand; + + /// Callee-save register cost, calculated once per machine function. + BlockFrequency CSRCost; + + /// Run or not the local reassignment heuristic. This information is + /// obtained from the TargetSubtargetInfo. + bool EnableLocalReassign; + + /// Set of broken hints that may be reconciled later because of eviction. + SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; + +public: + RAGreedy(); + + /// Return the pass name. + const char* getPassName() const override { + return "Greedy Register Allocator"; + } + + /// RAGreedy analysis usage. + void getAnalysisUsage(AnalysisUsage &AU) const override; + void releaseMemory() override; + Spiller &spiller() override { return *SpillerInstance; } + void enqueue(LiveInterval *LI) override; + LiveInterval *dequeue() override; + unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; + void aboutToRemoveInterval(LiveInterval &) override; + + /// Perform register allocation. + bool runOnMachineFunction(MachineFunction &mf) override; + + static char ID; + +private: + unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned = 0); + + bool LRE_CanEraseVirtReg(unsigned) override; + void LRE_WillShrinkVirtReg(unsigned) override; + void LRE_DidCloneVirtReg(unsigned, unsigned) override; + void enqueue(PQueue &CurQueue, LiveInterval *LI); + LiveInterval *dequeue(PQueue &CurQueue); + + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); + void growRegion(GlobalSplitCandidate &Cand); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); + bool calcCompactRegion(GlobalSplitCandidate&); + void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); + void calcGapWeights(unsigned, SmallVectorImpl<float>&); + unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); + bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); + bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); + void evictInterference(LiveInterval&, unsigned, + SmallVectorImpl<unsigned>&); + bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters); + + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&); + unsigned tryEvict(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&, unsigned = ~0u); + unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&); + /// Calculate cost of region splitting. + unsigned calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, bool IgnoreCSR); + /// Perform region splitting. + unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl<unsigned> &NewVRegs); + /// Check other options before using a callee-saved register for the first + /// time. + unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, + unsigned PhysReg, unsigned &CostPerUseLimit, + SmallVectorImpl<unsigned> &NewVRegs); + void initializeCSRCost(); + unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&); + unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&); + unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&); + unsigned trySplit(LiveInterval&, AllocationOrder&, + SmallVectorImpl<unsigned>&); + unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, + SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned); + bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, + SmallVirtRegSet &, unsigned); + void tryHintRecoloring(LiveInterval &); + void tryHintsRecoloring(); + + /// Model the information carried by one end of a copy. + struct HintInfo { + /// The frequency of the copy. + BlockFrequency Freq; + /// The virtual register or physical register. + unsigned Reg; + /// Its currently assigned register. + /// In case of a physical register Reg == PhysReg. + unsigned PhysReg; + HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) + : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} + }; + typedef SmallVector<HintInfo, 4> HintsInfo; + BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); + void collectHintInfo(unsigned, HintsInfo &); + + bool isUnusedCalleeSavedReg(unsigned PhysReg) const; +}; +} // end anonymous namespace + +char RAGreedy::ID = 0; + +#ifndef NDEBUG +const char *const RAGreedy::StageName[] = { + "RS_New", + "RS_Assign", + "RS_Split", + "RS_Split2", + "RS_Spill", + "RS_Memory", + "RS_Done" +}; +#endif + +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 + + +FunctionPass* llvm::createGreedyRegisterAllocator() { + return new RAGreedy(); +} + +RAGreedy::RAGreedy(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); + initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); + initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); + initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); +} + +void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addPreserved<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addRequired<VirtRegMap>(); + AU.addPreserved<VirtRegMap>(); + AU.addRequired<LiveRegMatrix>(); + AU.addPreserved<LiveRegMatrix>(); + AU.addRequired<EdgeBundles>(); + AU.addRequired<SpillPlacement>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + + +//===----------------------------------------------------------------------===// +// LiveRangeEdit delegate methods +//===----------------------------------------------------------------------===// + +bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (VRM->hasPhys(VirtReg)) { + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + aboutToRemoveInterval(LI); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { + if (!VRM->hasPhys(VirtReg)) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + Matrix->unassign(LI); + enqueue(&LI); +} + +void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // Cloning a register we haven't even heard about yet? Just ignore it. + if (!ExtraRegInfo.inBounds(Old)) + return; + + // LRE may clone a virtual register because dead code elimination causes it to + // be split into connected components. The new components are much smaller + // than the original, so they should get a new chance at being assigned. + // same stage as the parent. + ExtraRegInfo[Old].Stage = RS_Assign; + ExtraRegInfo.grow(New); + ExtraRegInfo[New] = ExtraRegInfo[Old]; +} + +void RAGreedy::releaseMemory() { + SpillerInstance.reset(); + ExtraRegInfo.clear(); + GlobalCand.clear(); +} + +void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } + +void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { + // Prioritize live ranges by size, assigning larger ranges first. + // The queue holds (size, reg) pairs. + const unsigned Size = LI->getSize(); + const unsigned Reg = LI->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only enqueue virtual registers"); + unsigned Prio; + + ExtraRegInfo.grow(Reg); + if (ExtraRegInfo[Reg].Stage == RS_New) + ExtraRegInfo[Reg].Stage = RS_Assign; + + if (ExtraRegInfo[Reg].Stage == RS_Split) { + // Unsplit ranges that couldn't be allocated immediately are deferred until + // everything else has been allocated. + Prio = Size; + } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { + // Memory operand should be considered last. + // Change the priority such that Memory operand are assigned in + // the reverse order that they came in. + // TODO: Make this a member variable and probably do something about hints. + static unsigned MemOp = 0; + Prio = MemOp++; + } else { + // Giant live ranges fall back to the global assignment heuristic, which + // prevents excessive spilling in pathological cases. + bool ReverseLocal = TRI->reverseLocalAssignment(); + const TargetRegisterClass &RC = *MRI->getRegClass(Reg); + bool ForceGlobal = !ReverseLocal && + (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); + + if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && + LIS->intervalIsInOneMBB(*LI)) { + // Allocate original local ranges in linear instruction order. Since they + // are singly defined, this produces optimal coloring in the absence of + // global interference and other constraints. + if (!ReverseLocal) + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + else { + // Allocating bottom up may allow many short LRGs to be assigned first + // to one of the cheap registers. This could be much faster for very + // large blocks on targets with many physical registers. + Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); + } + Prio |= RC.AllocationPriority << 24; + } else { + // Allocate global and split ranges in long->short order. Long ranges that + // don't fit should be spilled (or split) ASAP so they don't create + // interference. Mark a bit to prioritize global above local ranges. + Prio = (1u << 29) + Size; + } + // Mark a higher bit to prioritize global and local above RS_Split. + Prio |= (1u << 31); + + // Boost ranges that have a physical register hint. + if (VRM->hasKnownPreference(Reg)) + Prio |= (1u << 30); + } + // The virtual register number is a tie breaker for same-sized ranges. + // Give lower vreg numbers higher priority to assign them first. + CurQueue.push(std::make_pair(Prio, ~Reg)); +} + +LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } + +LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { + if (CurQueue.empty()) + return nullptr; + LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); + CurQueue.pop(); + return LI; +} + + +//===----------------------------------------------------------------------===// +// Direct Assignment +//===----------------------------------------------------------------------===// + +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs) { + Order.rewind(); + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!Matrix->checkInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint()) + return PhysReg; + + // PhysReg is available, but there may be a better choice. + + // If we missed a simple hint, try to cheaply evict interference from the + // preferred register. + if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) + if (Order.isHint(Hint)) { + DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); + EvictionCost MaxCost; + MaxCost.setBrokenHints(1); + if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { + evictInterference(VirtReg, Hint, NewVRegs); + return Hint; + } + } + + // Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); + + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; +} + + +//===----------------------------------------------------------------------===// +// Interference eviction +//===----------------------------------------------------------------------===// + +unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); + unsigned PhysReg; + while ((PhysReg = Order.next())) { + if (PhysReg == PrevReg) + continue; + + MCRegUnitIterator Units(PhysReg, TRI); + for (; Units.isValid(); ++Units) { + // Instantiate a "subquery", not to be confused with the Queries array. + LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); + if (subQ.checkInterference()) + break; + } + // If no units have interference, break out with the current PhysReg. + if (!Units.isValid()) + break; + } + if (PhysReg) + DEBUG(dbgs() << "can reassign: " << VirtReg << " from " + << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) + << '\n'); + return PhysReg; +} + +/// shouldEvict - determine if A should evict the assigned live range B. The +/// eviction policy defined by this function together with the allocation order +/// defined by enqueue() decides which registers ultimately end up being split +/// and spilled. +/// +/// Cascade numbers are used to prevent infinite loops if this function is a +/// cyclic relation. +/// +/// @param A The live range to be assigned. +/// @param IsHint True when A is about to be assigned to its preferred +/// register. +/// @param B The live range to be evicted. +/// @param BreaksHint True when B is already assigned to its preferred register. +bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, + LiveInterval &B, bool BreaksHint) { + bool CanSplit = getStage(B) < RS_Spill; + + // Be fairly aggressive about following hints as long as the evictee can be + // split. + if (CanSplit && IsHint && !BreaksHint) + return true; + + if (A.weight > B.weight) { + DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); + return true; + } + return false; +} + +/// canEvictInterference - Return true if all interferences between VirtReg and +/// PhysReg can be evicted. +/// +/// @param VirtReg Live range that is about to be assigned. +/// @param PhysReg Desired register for assignment. +/// @param IsHint True when PhysReg is VirtReg's preferred register. +/// @param MaxCost Only look for cheaper candidates and update with new cost +/// when returning true. +/// @returns True when interference can be evicted cheaper than MaxCost. +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, + bool IsHint, EvictionCost &MaxCost) { + // It is only possible to evict virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) + return false; + + bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never + // involved in an eviction before. If a cascade number was assigned, deny + // evicting anything with the same or a newer cascade number. This prevents + // infinite eviction loops. + // + // This works out so a register without a cascade number is allowed to evict + // anything, and it can be evicted by anything. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = NextCascade; + + EvictionCost Cost; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is 10 or more interferences, chances are one is heavier. + if (Q.collectInterferingVRegs(10) >= 10) + return false; + + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && + "Only expecting virtual register interference from query"); + // Never evict spill products. They cannot split or spill. + if (getStage(*Intf) == RS_Done) + return false; + // Once a live range becomes small enough, it is urgent that we find a + // register for it. This is indicated by an infinite spill weight. These + // urgent live ranges get to evict almost anything. + // + // Also allow urgent evictions of unspillable ranges from a strictly + // larger allocation order. + bool Urgent = !VirtReg.isSpillable() && + (Intf->isSpillable() || + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < + RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); + // Only evict older cascades or live ranges without a cascade. + unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; + if (Cascade <= IntfCascade) { + if (!Urgent) + return false; + // We permit breaking cascades for urgent evictions. It should be the + // last resort, though, so make it really expensive. + Cost.BrokenHints += 10; + } + // Would this break a satisfied hint? + bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); + // Update eviction cost. + Cost.BrokenHints += BreaksHint; + Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); + // Abort if this would be too expensive. + if (!(Cost < MaxCost)) + return false; + if (Urgent) + continue; + // Apply the eviction policy for non-urgent evictions. + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + return false; + // If !MaxCost.isMax(), then we're just looking for a cheap register. + // Evicting another local live range in this case could lead to suboptimal + // coloring. + if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { + return false; + } + } + } + MaxCost = Cost; + return true; +} + +/// evictInterference - Evict any interferring registers that prevent VirtReg +/// from being assigned to Physreg. This assumes that canEvictInterference +/// returned true. +void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, + SmallVectorImpl<unsigned> &NewVRegs) { + // Make sure that VirtReg has a cascade number, and assign that cascade + // number to every evicted register. These live ranges than then only be + // evicted by a newer cascade, preventing infinite loops. + unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; + if (!Cascade) + Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; + + DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) + << " interference: Cascade " << Cascade << '\n'); + + // Collect all interfering virtregs first. + SmallVector<LiveInterval*, 8> Intfs; + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); + Intfs.append(IVR.begin(), IVR.end()); + } + + // Evict them second. This will invalidate the queries. + for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { + LiveInterval *Intf = Intfs[i]; + // The same VirtReg may be present in multiple RegUnits. Skip duplicates. + if (!VRM->hasPhys(Intf->reg)) + continue; + Matrix->unassign(*Intf); + assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || + VirtReg.isSpillable() < Intf->isSpillable()) && + "Cannot decrease cascade number, illegal eviction"); + ExtraRegInfo[Intf->reg].Cascade = Cascade; + ++NumEvicted; + NewVRegs.push_back(Intf->reg); + } +} + +/// Returns true if the given \p PhysReg is a callee saved register and has not +/// been used for allocation yet. +bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { + unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); + if (CSR == 0) + return false; + + return !Matrix->isPhysRegUsed(PhysReg); +} + +/// tryEvict - Try to evict all interferences for a physreg. +/// @param VirtReg Currently unassigned virtual register. +/// @param Order Physregs to try. +/// @return Physreg to assign VirtReg, or 0. +unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs, + unsigned CostPerUseLimit) { + NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); + + // Keep track of the cheapest interference seen so far. + EvictionCost BestCost; + BestCost.setMax(); + unsigned BestPhys = 0; + unsigned OrderLimit = Order.getOrder().size(); + + // When we are just looking for a reduced cost per use, don't break any + // hints, and only evict smaller spill weights. + if (CostPerUseLimit < ~0u) { + BestCost.BrokenHints = 0; + BestCost.MaxWeight = VirtReg.weight; + + // Check of any registers in RC are below CostPerUseLimit. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + unsigned MinCost = RegClassInfo.getMinCost(RC); + if (MinCost >= CostPerUseLimit) { + DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost + << ", no cheaper registers to be found.\n"); + return 0; + } + + // It is normal for register classes to have a long tail of registers with + // the same cost. We don't need to look at them if they're too expensive. + if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + OrderLimit = RegClassInfo.getLastCostChange(RC); + DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + } + } + + Order.rewind(); + while (unsigned PhysReg = Order.next(OrderLimit)) { + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + continue; + // The first use of a callee-saved register in a function has cost 1. + // Don't start using a CSR when the CostPerUseLimit is low. + if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " + << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) + << '\n'); + continue; + } + + if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) + continue; + + // Best so far. + BestPhys = PhysReg; + + // Stop if the hint can be used. + if (Order.isHint()) + break; + } + + if (!BestPhys) + return 0; + + evictInterference(VirtReg, BestPhys, NewVRegs); + return BestPhys; +} + + +//===----------------------------------------------------------------------===// +// Region Splitting +//===----------------------------------------------------------------------===// + +/// addSplitConstraints - Fill out the SplitConstraints vector based on the +/// interference pattern in Physreg and its aliases. Add the constraints to +/// SpillPlacement and return the static cost of this split in Cost, assuming +/// that all preferences in SplitConstraints are met. +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + BlockFrequency &Cost) { + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + + // Reset interference dependent info. + SplitConstraints.resize(UseBlocks.size()); + BlockFrequency StaticCost = 0; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + + BC.Number = BI.MBB->getNumber(); + Intf.moveToBlock(BC.Number); + BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.ChangesValue = BI.FirstDef.isValid(); + + if (!Intf.hasInterference()) + continue; + + // Number of spill code instructions to insert. + unsigned Ins = 0; + + // Interference for the live-in value. + if (BI.LiveIn) { + if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) + BC.Entry = SpillPlacement::MustSpill, ++Ins; + else if (Intf.first() < BI.FirstInstr) + BC.Entry = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.first() < BI.LastInstr) + ++Ins; + } + + // Interference for the live-out value. + if (BI.LiveOut) { + if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) + BC.Exit = SpillPlacement::MustSpill, ++Ins; + else if (Intf.last() > BI.LastInstr) + BC.Exit = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.last() > BI.FirstInstr) + ++Ins; + } + + // Accumulate the total frequency of inserted spill code. + while (Ins--) + StaticCost += SpillPlacer->getBlockFrequency(BC.Number); + } + Cost = StaticCost; + + // Add constraints for use-blocks. Note that these are the only constraints + // that may add a positive bias, it is downhill from here. + SpillPlacer->addConstraints(SplitConstraints); + return SpillPlacer->scanActiveBundles(); +} + + +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef<unsigned> Blocks) { + const unsigned GroupSize = 8; + SpillPlacement::BlockConstraint BCS[GroupSize]; + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; + + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; + Intf.moveToBlock(Number); + + if (!Intf.hasInterference()) { + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(makeArrayRef(TBS, T)); + T = 0; + } + continue; + } + + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + + // Interference for the live-in value. + if (Intf.first() <= Indexes->getMBBStartIdx(Number)) + BCS[B].Entry = SpillPlacement::MustSpill; + else + BCS[B].Entry = SpillPlacement::PrefSpill; + + // Interference for the live-out value. + if (Intf.last() >= SA->getLastSplitPoint(Number)) + BCS[B].Exit = SpillPlacement::MustSpill; + else + BCS[B].Exit = SpillPlacement::PrefSpill; + + if (++B == GroupSize) { + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); + B = 0; + } + } + + SpillPlacer->addConstraints(makeArrayRef(BCS, B)); + SpillPlacer->addLinks(makeArrayRef(TBS, T)); +} + +void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif + + for (;;) { + ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) + continue; + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif + } + } + // Any new blocks to add? + if (ActiveBlocks.size() == AddedTo) + break; + + // Compute through constraints from the interference, or assume that all + // through blocks prefer spilling when forming compact regions. + auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); + if (Cand.PhysReg) + addThroughConstraints(Cand.Intf, NewBlocks); + else + // Provide a strong negative bias on through blocks to prevent unwanted + // liveness on loop backedges. + SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); + AddedTo = ActiveBlocks.size(); + + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); + } + DEBUG(dbgs() << ", v=" << Visited); +} + +/// calcCompactRegion - Compute the set of edge bundles that should be live +/// when splitting the current live range into compact regions. Compact +/// regions can be computed without looking at interference. They are the +/// regions formed by removing all the live-through blocks from the live range. +/// +/// Returns false if the current live range is already compact, or if the +/// compact regions would form single block regions anyway. +bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { + // Without any through blocks, the live range is already compact. + if (!SA->getNumThroughBlocks()) + return false; + + // Compact regions don't correspond to any physreg. + Cand.reset(IntfCache, 0); + + DEBUG(dbgs() << "Compact region bundles"); + + // Use the spill placer to determine the live bundles. GrowRegion pretends + // that all the through blocks have interference when PhysReg is unset. + SpillPlacer->prepare(Cand.LiveBundles); + + // The static split cost will be zero since Cand.Intf reports no interference. + BlockFrequency Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + growRegion(Cand); + SpillPlacer->finish(); + + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << ", none.\n"); + return false; + } + + DEBUG({ + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + return true; +} + +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +BlockFrequency RAGreedy::calcSpillCost() { + BlockFrequency Cost = 0; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); + + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut && BI.FirstDef) + Cost += SpillPlacer->getBlockFrequency(Number); + } + return Cost; +} + +/// calcGlobalSplitCost - Return the global split cost of following the split +/// pattern in LiveBundles. This cost should be added to the local cost of the +/// interference pattern in SplitConstraints. +/// +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { + BlockFrequency GlobalCost = 0; + const BitVector &LiveBundles = Cand.LiveBundles; + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + unsigned Ins = 0; + + if (BI.LiveIn) + Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); + if (BI.LiveOut) + Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); + while (Ins--) + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); + } + + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + if (!RegIn && !RegOut) + continue; + if (RegIn && RegOut) { + // We need double spill code if this block has interference. + Cand.Intf.moveToBlock(Number); + if (Cand.Intf.hasInterference()) { + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } + continue; + } + // live-in / stack-out or stack-in live-out. + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } + return GlobalCost; +} + +/// splitAroundRegion - Split the current live range around the regions +/// determined by BundleCand and GlobalCand. +/// +/// Before calling this function, GlobalCand and BundleCand must be initialized +/// so each bundle is assigned to a valid candidate, or NoCand for the +/// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor +/// objects must be initialized for the current live range, and intervals +/// created for the used candidates. +/// +/// @param LREdit The LiveRangeEdit object handling the current split. +/// @param UsedCands List of used GlobalCand entries. Every BundleCand value +/// must appear in this list. +void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, + ArrayRef<unsigned> UsedCands) { + // These are the intervals created for new global ranges. We may create more + // intervals for local ranges. + const unsigned NumGlobalIntvs = LREdit.size(); + DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); + assert(NumGlobalIntvs && "No global intervals configured"); + + // Isolate even single instructions when dealing with a proper sub-class. + // That guarantees register class inflation for the stack interval because it + // is all copies. + unsigned Reg = SA->getParent().reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + + // First handle all the blocks with uses. + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + if (BI.LiveIn) { + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + } + if (BI.LiveOut) { + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + } + + // Create separate intervals for isolated blocks with multiple uses. + if (!IntvIn && !IntvOut) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + continue; + } + + if (IntvIn && IntvOut) + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + else if (IntvIn) + SE->splitRegInBlock(BI, IntvIn, IntfIn); + else + SE->splitRegOutBlock(BI, IntvOut, IntfOut); + } + + // Handle live-through blocks. The relevant live-through blocks are stored in + // the ActiveBlocks list with each candidate. We need to filter out + // duplicates. + BitVector Todo = SA->getThroughBlocks(); + for (unsigned c = 0; c != UsedCands.size(); ++c) { + ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; + for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { + unsigned Number = Blocks[i]; + if (!Todo.test(Number)) + continue; + Todo.reset(Number); + + unsigned IntvIn = 0, IntvOut = 0; + SlotIndex IntfIn, IntfOut; + + unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; + if (CandIn != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandIn]; + IntvIn = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfIn = Cand.Intf.first(); + } + + unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; + if (CandOut != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[CandOut]; + IntvOut = Cand.IntvIdx; + Cand.Intf.moveToBlock(Number); + IntfOut = Cand.Intf.last(); + } + if (!IntvIn && !IntvOut) + continue; + SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); + } + } + + ++NumGlobalSplits; + + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumLiveBlocks(); + + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); + + // Ignore old intervals from DCE. + if (getStage(Reg) != RS_New) + continue; + + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + setStage(Reg, RS_Spill); + continue; + } + + // Global intervals. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] < NumGlobalIntvs) { + if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + setStage(Reg, RS_Split2); + } + continue; + } + + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around region"); +} + +unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs) { + unsigned NumCands = 0; + BlockFrequency BestCost; + + // Check if we can split this live range around a compact region. + bool HasCompact = calcCompactRegion(GlobalCand.front()); + if (HasCompact) { + // Yes, keep GlobalCand[0] as the compact region candidate. + NumCands = 1; + BestCost = BlockFrequency::getMaxFrequency(); + } else { + // No benefit from the compact region, our fallback will be per-block + // splitting. Make sure we find a solution that is cheaper than spilling. + BestCost = calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = "; + MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); + } + + unsigned BestCand = + calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, + false/*IgnoreCSR*/); + + // No solutions found, fall back to single block splitting. + if (!HasCompact && BestCand == NoCand) + return 0; + + return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); +} + +unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + bool IgnoreCSR) { + unsigned BestCand = NoCand; + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) + continue; + + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned i = 0; i != NumCands; ++i) { + if (i == BestCand || !GlobalCand[i].PhysReg) + continue; + unsigned Count = GlobalCand[i].LiveBundles.count(); + if (Count < WorstCount) + Worst = i, WorstCount = Count; + } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; + } + + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); + + SpillPlacer->prepare(Cand.LiveBundles); + BlockFrequency Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); + continue; + } + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; + MBFI->printBlockFreq(dbgs(), Cost)); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); + continue; + } + growRegion(Cand); + + SpillPlacer->finish(); + + // No live bundles, defer to splitSingleBlocks(). + if (!Cand.LiveBundles.any()) { + DEBUG(dbgs() << " no bundles.\n"); + continue; + } + + Cost += calcGlobalSplitCost(Cand); + DEBUG({ + dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) + << " with bundles"; + for (int i = Cand.LiveBundles.find_first(); i>=0; + i = Cand.LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + if (Cost < BestCost) { + BestCand = NumCands; + BestCost = Cost; + } + ++NumCands; + } + return BestCand; +} + +unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, + bool HasCompact, + SmallVectorImpl<unsigned> &NewVRegs) { + SmallVector<unsigned, 8> UsedCands; + // Prepare split editor. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + + // Assign all edge bundles to the preferred candidate, or NoCand. + BundleCand.assign(Bundles->getNumBundles(), NoCand); + + // Assign bundles for the best candidate region. + if (BestCand != NoCand) { + GlobalSplitCandidate &Cand = GlobalCand[BestCand]; + if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { + UsedCands.push_back(BestCand); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " + << B << " bundles, intv " << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + // Assign bundles for the compact region. + if (HasCompact) { + GlobalSplitCandidate &Cand = GlobalCand.front(); + assert(!Cand.PhysReg && "Compact region has no physreg"); + if (unsigned B = Cand.getBundles(BundleCand, 0)) { + UsedCands.push_back(0); + Cand.IntvIdx = SE->openIntv(); + DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " + << Cand.IntvIdx << ".\n"); + (void)B; + } + } + + splitAroundRegion(LREdit, UsedCands); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Block Splitting +//===----------------------------------------------------------------------===// + +/// tryBlockSplit - Split a global live range around every block with uses. This +/// creates a lot of local live ranges, that will be split by tryLocalSplit if +/// they don't allocate. +unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs) { + assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); + unsigned Reg = VirtReg.reg; + bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitSpillMode); + ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) + SE->splitSingleBlock(BI); + } + // No blocks were split. + if (LREdit.empty()) + return 0; + + // We did split for some blocks. + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + + // Tell LiveDebugVariables about the new ranges. + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); + + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Sort out the new intervals created by splitting. The remainder interval + // goes straight to spilling, the new local ranges get to stay RS_New. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + LiveInterval &LI = LIS->getInterval(LREdit.get(i)); + if (getStage(LI) == RS_New && IntvMap[i] == 0) + setStage(LI, RS_Spill); + } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around basic blocks"); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Per-Instruction Splitting +//===----------------------------------------------------------------------===// + +/// Get the number of allocatable registers that match the constraints of \p Reg +/// on \p MI and that are also in \p SuperRC. +static unsigned getNumAllocatableRegsForConstraints( + const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, + const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, + const RegisterClassInfo &RCI) { + assert(SuperRC && "Invalid register class"); + + const TargetRegisterClass *ConstrainedRC = + MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, + /* ExploreBundle */ true); + if (!ConstrainedRC) + return 0; + return RCI.getNumAllocatableRegs(ConstrainedRC); +} + +/// tryInstructionSplit - Split a live range around individual instructions. +/// This is normally not worthwhile since the spiller is doing essentially the +/// same thing. However, when the live range is in a constrained register +/// class, it may help to insert copies such that parts of the live range can +/// be moved to a larger register class. +/// +/// This is similar to spilling to a larger register class. +unsigned +RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + // There is no point to this if there are no larger sub-classes. + if (!RegClassInfo.isProperSubClass(CurRC)) + return 0; + + // Always enable split spill mode, since we're effectively spilling to a + // register. + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit, SplitEditor::SM_Size); + + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + if (Uses.size() <= 1) + return 0; + + DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); + + const TargetRegisterClass *SuperRC = + TRI->getLargestLegalSuperClass(CurRC, *MF); + unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); + // Split around every non-copy instruction if this split will relax + // the constraints on the virtual register. + // Otherwise, splitting just inserts uncoalescable copies that do not help + // the allocation. + for (unsigned i = 0; i != Uses.size(); ++i) { + if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) + if (MI->isFullCopy() || + SuperRCNumAllocatableRegs == + getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, + TRI, RCI)) { + DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); + continue; + } + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); + SE->useIntv(SegStart, SegStop); + } + + if (LREdit.empty()) { + DEBUG(dbgs() << "All uses were copies.\n"); + return 0; + } + + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + + // Assign all new registers to RS_Spill. This was the last chance. + setStage(LREdit.begin(), LREdit.end(), RS_Spill); + return 0; +} + + +//===----------------------------------------------------------------------===// +// Local Splitting +//===----------------------------------------------------------------------===// + + +/// calcGapWeights - Compute the maximum spill weight that needs to be evicted +/// in order to use PhysReg between two entries in SA->UseSlots. +/// +/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. +/// +void RAGreedy::calcGapWeights(unsigned PhysReg, + SmallVectorImpl<float> &GapWeight) { + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + const unsigned NumGaps = Uses.size()-1; + + // Start and end points for the interference check. + SlotIndex StartIdx = + BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; + SlotIndex StopIdx = + BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; + + GapWeight.assign(NumGaps, 0.0f); + + // Add interference from each overlapping register. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) + .checkInterference()) + continue; + + // We know that VirtReg is a continuous interval from FirstInstr to + // LastInstr, so we don't need InterferenceQuery. + // + // Interference that overlaps an instruction is counted in both gaps + // surrounding the instruction. The exception is interference before + // StartIdx and after StopIdx. + // + LiveIntervalUnion::SegmentIter IntI = + Matrix->getLiveUnions()[*Units] .find(StartIdx); + for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { + // Skip the gaps before IntI. + while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + // Update the gaps covered by IntI. + const float weight = IntI.value()->weight; + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = std::max(GapWeight[Gap], weight); + if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) + break; + } + if (Gap == NumGaps) + break; + } + } + + // Add fixed interference. + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveRange &LR = LIS->getRegUnit(*Units); + LiveRange::const_iterator I = LR.find(StartIdx); + LiveRange::const_iterator E = LR.end(); + + // Same loop as above. Mark any overlapped gaps as HUGE_VALF. + for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { + while (Uses[Gap+1].getBoundaryIndex() < I->start) + if (++Gap == NumGaps) + break; + if (Gap == NumGaps) + break; + + for (; Gap != NumGaps; ++Gap) { + GapWeight[Gap] = llvm::huge_valf; + if (Uses[Gap+1].getBaseIndex() >= I->end) + break; + } + if (Gap == NumGaps) + break; + } + } +} + +/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only +/// basic block. +/// +unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs) { + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); + + // Note that it is possible to have an interval that is live-in or live-out + // while only covering a single block - A phi-def can use undef values from + // predecessors, and the block could be a single-block loop. + // We don't bother doing anything clever about such a case, we simply assume + // that the interval is continuous from FirstInstr to LastInstr. We should + // make sure that we don't do anything illegal to such an interval, though. + + ArrayRef<SlotIndex> Uses = SA->getUseSlots(); + if (Uses.size() <= 2) + return 0; + const unsigned NumGaps = Uses.size()-1; + + DEBUG({ + dbgs() << "tryLocalSplit: "; + for (unsigned i = 0, e = Uses.size(); i != e; ++i) + dbgs() << ' ' << Uses[i]; + dbgs() << '\n'; + }); + + // If VirtReg is live across any register mask operands, compute a list of + // gaps with register masks. + SmallVector<unsigned, 8> RegMaskGaps; + if (Matrix->checkRegMaskInterference(VirtReg)) { + // Get regmask slots for the whole block. + ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); + DEBUG(dbgs() << RMS.size() << " regmasks in block:"); + // Constrain to VirtReg's live range. + unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), + Uses.front().getRegSlot()) - RMS.begin(); + unsigned re = RMS.size(); + for (unsigned i = 0; i != NumGaps && ri != re; ++i) { + // Look for Uses[i] <= RMS <= Uses[i+1]. + assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); + if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) + continue; + // Skip a regmask on the same instruction as the last use. It doesn't + // overlap the live range. + if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) + break; + DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); + RegMaskGaps.push_back(i); + // Advance ri to the next gap. A regmask on one of the uses counts in + // both gaps. + while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) + ++ri; + } + DEBUG(dbgs() << '\n'); + } + + // Since we allow local split results to be split again, there is a risk of + // creating infinite loops. It is tempting to require that the new live + // ranges have less instructions than the original. That would guarantee + // convergence, but it is too strict. A live range with 3 instructions can be + // split 2+3 (including the COPY), and we want to allow that. + // + // Instead we use these rules: + // + // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the + // noop split, of course). + // 2. Require progress be made for ranges with getStage() == RS_Split2. All + // the new ranges must have fewer instructions than before the split. + // 3. New ranges with the same number of instructions are marked RS_Split2, + // smaller ranges are marked RS_New. + // + // These rules allow a 3 -> 2+3 split once, which we need. They also prevent + // excessive splitting and infinite loops. + // + bool ProgressRequired = getStage(VirtReg) >= RS_Split2; + + // Best split candidate. + unsigned BestBefore = NumGaps; + unsigned BestAfter = 0; + float BestDiff = 0; + + const float blockFreq = + SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * + (1.0f / MBFI->getEntryFreq()); + SmallVector<float, 8> GapWeight; + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + // Keep track of the largest spill weight that would need to be evicted in + // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. + calcGapWeights(PhysReg, GapWeight); + + // Remove any gaps with regmask clobbers. + if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) + for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) + GapWeight[RegMaskGaps[i]] = llvm::huge_valf; + + // Try to find the best sequence of gaps to close. + // The new spill weight must be larger than any gap interference. + + // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. + unsigned SplitBefore = 0, SplitAfter = 1; + + // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). + // It is the spill weight that needs to be evicted. + float MaxGap = GapWeight[0]; + + for (;;) { + // Live before/after split? + const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; + const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' + << Uses[SplitBefore] << '-' << Uses[SplitAfter] + << " i=" << MaxGap); + + // Stop before the interval gets so big we wouldn't be making progress. + if (!LiveBefore && !LiveAfter) { + DEBUG(dbgs() << " all\n"); + break; + } + // Should the interval be extended or shrunk? + bool Shrink = true; + + // How many gaps would the new range have? + unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; + + // Legally, without causing looping? + bool Legal = !ProgressRequired || NewGaps < NumGaps; + + if (Legal && MaxGap < llvm::huge_valf) { + // Estimate the new spill weight. Each instruction reads or writes the + // register. Conservatively assume there are no read-modify-write + // instructions. + // + // Try to guess the size of the new interval. + const float EstWeight = normalizeSpillWeight( + blockFreq * (NewGaps + 1), + Uses[SplitBefore].distance(Uses[SplitAfter]) + + (LiveBefore + LiveAfter) * SlotIndex::InstrDist, + 1); + // Would this split be possible to allocate? + // Never allocate all gaps, we wouldn't be making progress. + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { + Shrink = false; + float Diff = EstWeight - MaxGap; + if (Diff > BestDiff) { + DEBUG(dbgs() << " (best)"); + BestDiff = Hysteresis * Diff; + BestBefore = SplitBefore; + BestAfter = SplitAfter; + } + } + } + + // Try to shrink. + if (Shrink) { + if (++SplitBefore < SplitAfter) { + DEBUG(dbgs() << " shrink\n"); + // Recompute the max when necessary. + if (GapWeight[SplitBefore - 1] >= MaxGap) { + MaxGap = GapWeight[SplitBefore]; + for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) + MaxGap = std::max(MaxGap, GapWeight[i]); + } + continue; + } + MaxGap = 0; + } + + // Try to extend the interval. + if (SplitAfter >= NumGaps) { + DEBUG(dbgs() << " end\n"); + break; + } + + DEBUG(dbgs() << " extend\n"); + MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); + } + } + + // Didn't find any candidates? + if (BestBefore == NumGaps) + return 0; + + DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] + << '-' << Uses[BestAfter] << ", " << BestDiff + << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); + + LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + SE->reset(LREdit); + + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); + SE->useIntv(SegStart, SegStop); + SmallVector<unsigned, 8> IntvMap; + SE->finish(&IntvMap); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); + + // If the new range has the same number of instructions as before, mark it as + // RS_Split2 so the next split will be forced to make progress. Otherwise, + // leave the new intervals as RS_New so they can compete. + bool LiveBefore = BestBefore != 0 || BI.LiveIn; + bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; + unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; + if (NewGaps >= NumGaps) { + DEBUG(dbgs() << "Tagging non-progress ranges: "); + assert(!ProgressRequired && "Didn't make progress when it was required."); + for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) + if (IntvMap[i] == 1) { + setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i))); + } + DEBUG(dbgs() << '\n'); + } + ++NumLocalSplits; + + return 0; +} + +//===----------------------------------------------------------------------===// +// Live Range Splitting +//===----------------------------------------------------------------------===// + +/// trySplit - Try to split VirtReg or one of its interferences, making it +/// assignable. +/// @return Physreg when VirtReg may be assigned and/or new NewVRegs. +unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl<unsigned>&NewVRegs) { + // Ranges must be Split2 or less. + if (getStage(VirtReg) >= RS_Spill) + return 0; + + // Local intervals are handled separately. + if (LIS->intervalIsInOneMBB(VirtReg)) { + NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + SA->analyze(&VirtReg); + unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + return tryInstructionSplit(VirtReg, Order, NewVRegs); + } + + NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + + SA->analyze(&VirtReg); + + // FIXME: SplitAnalysis may repair broken live ranges coming from the + // coalescer. That may cause the range to become allocatable which means that + // tryRegionSplit won't be making progress. This check should be replaced with + // an assertion when the coalescer is fixed. + if (SA->didRepairRange()) { + // VirtReg has changed, so all cached queries are invalid. + Matrix->invalidateVirtRegs(); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) + return PhysReg; + } + + // First try to split around a region spanning multiple blocks. RS_Split2 + // ranges already made dubious progress with region splitting, so they go + // straight to single block splitting. + if (getStage(VirtReg) < RS_Split2) { + unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + } + + // Then isolate blocks. + return tryBlockSplit(VirtReg, Order, NewVRegs); +} + +//===----------------------------------------------------------------------===// +// Last Chance Recoloring +//===----------------------------------------------------------------------===// + +/// mayRecolorAllInterferences - Check if the virtual registers that +/// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be +/// recolored to free \p PhysReg. +/// When true is returned, \p RecoloringCandidates has been augmented with all +/// the live intervals that need to be recolored in order to free \p PhysReg +/// for \p VirtReg. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +bool +RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, + SmallLISet &RecoloringCandidates, + const SmallVirtRegSet &FixedRegisters) { + const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); + + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); + // If there is LastChanceRecoloringMaxInterference or more interferences, + // chances are one would not be recolorable. + if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= + LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { + DEBUG(dbgs() << "Early abort: too many interferences.\n"); + CutOffInfo |= CO_Interf; + return false; + } + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; + // If Intf is done and sit on the same register class as VirtReg, + // it would not be recolorable as it is in the same state as VirtReg. + if ((getStage(*Intf) == RS_Done && + MRI->getRegClass(Intf->reg) == CurRC) || + FixedRegisters.count(Intf->reg)) { + DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); + return false; + } + RecoloringCandidates.insert(Intf); + } + } + return true; +} + +/// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring +/// its interferences. +/// Last chance recoloring chooses a color for \p VirtReg and recolors every +/// virtual register that was using it. The recoloring process may recursively +/// use the last chance recoloring. Therefore, when a virtual register has been +/// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot +/// be last-chance-recolored again during this recoloring "session". +/// E.g., +/// Let +/// vA can use {R1, R2 } +/// vB can use { R2, R3} +/// vC can use {R1 } +/// Where vA, vB, and vC cannot be split anymore (they are reloads for +/// instance) and they all interfere. +/// +/// vA is assigned R1 +/// vB is assigned R2 +/// vC tries to evict vA but vA is already done. +/// Regular register allocation fails. +/// +/// Last chance recoloring kicks in: +/// vC does as if vA was evicted => vC uses R1. +/// vC is marked as fixed. +/// vA needs to find a color. +/// None are available. +/// vA cannot evict vC: vC is a fixed virtual register now. +/// vA does as if vB was evicted => vA uses R2. +/// vB needs to find a color. +/// R3 is available. +/// Recoloring => vC = R1, vA = R2, vB = R3 +/// +/// \p Order defines the preferred allocation order for \p VirtReg. +/// \p NewRegs will contain any new virtual register that have been created +/// (split, spill) during the process and that must be assigned. +/// \p FixedRegisters contains all the virtual registers that cannot be +/// recolored. +/// \p Depth gives the current depth of the last chance recoloring. +/// \return a physical register that can be used for VirtReg or ~0u if none +/// exists. +unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); + // Ranges must be Done. + assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && + "Last chance recoloring should really be last chance"); + // Set the max depth to LastChanceRecoloringMaxDepth. + // We may want to reconsider that if we end up with a too large search space + // for target with hundreds of registers. + // Indeed, in that case we may want to cut the search space earlier. + if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { + DEBUG(dbgs() << "Abort because max depth has been reached.\n"); + CutOffInfo |= CO_Depth; + return ~0u; + } + + // Set of Live intervals that will need to be recolored. + SmallLISet RecoloringCandidates; + // Record the original mapping virtual register to physical register in case + // the recoloring fails. + DenseMap<unsigned, unsigned> VirtRegToPhysReg; + // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in + // this recoloring "session". + FixedRegisters.insert(VirtReg.reg); + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + RecoloringCandidates.clear(); + VirtRegToPhysReg.clear(); + + // It is only possible to recolor virtual register interference. + if (Matrix->checkInterference(VirtReg, PhysReg) > + LiveRegMatrix::IK_VirtReg) { + DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); + + continue; + } + + // Early give up on this PhysReg if it is obvious we cannot recolor all + // the interferences. + if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, + FixedRegisters)) { + DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); + continue; + } + + // RecoloringCandidates contains all the virtual registers that interfer + // with VirtReg on PhysReg (or one of its aliases). + // Enqueue them for recoloring and perform the actual recoloring. + PQueue RecoloringQueue; + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + enqueue(RecoloringQueue, *It); + assert(VRM->hasPhys(ItVirtReg) && + "Interferences are supposed to be with allocated vairables"); + + // Record the current allocation. + VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); + // unset the related struct. + Matrix->unassign(**It); + } + + // Do as if VirtReg was assigned to PhysReg so that the underlying + // recoloring has the right information about the interferes and + // available colors. + Matrix->assign(VirtReg, PhysReg); + + // Save the current recoloring state. + // If we cannot recolor all the interferences, we will have to start again + // at this point for the next physical register. + SmallVirtRegSet SaveFixedRegisters(FixedRegisters); + if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, + Depth)) { + // Do not mess up with the global assignment process. + // I.e., VirtReg must be unassigned. + Matrix->unassign(VirtReg); + return PhysReg; + } + + DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " + << PrintReg(PhysReg, TRI) << '\n'); + + // The recoloring attempt failed, undo the changes. + FixedRegisters = SaveFixedRegisters; + Matrix->unassign(VirtReg); + + for (SmallLISet::iterator It = RecoloringCandidates.begin(), + EndIt = RecoloringCandidates.end(); + It != EndIt; ++It) { + unsigned ItVirtReg = (*It)->reg; + if (VRM->hasPhys(ItVirtReg)) + Matrix->unassign(**It); + unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; + Matrix->assign(**It, ItPhysReg); + } + } + + // Last chance recoloring did not worked either, give up. + return ~0u; +} + +/// tryRecoloringCandidates - Try to assign a new color to every register +/// in \RecoloringQueue. +/// \p NewRegs will contain any new virtual register created during the +/// recoloring process. +/// \p FixedRegisters[in/out] contains all the registers that have been +/// recolored. +/// \return true if all virtual registers in RecoloringQueue were successfully +/// recolored, false otherwise. +bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + while (!RecoloringQueue.empty()) { + LiveInterval *LI = dequeue(RecoloringQueue); + DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); + unsigned PhysReg; + PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); + if (PhysReg == ~0u || !PhysReg) + return false; + DEBUG(dbgs() << "Recoloring of " << *LI + << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); + Matrix->assign(*LI, PhysReg); + FixedRegisters.insert(LI->reg); + } + return true; +} + +//===----------------------------------------------------------------------===// +// Main Entry Point +//===----------------------------------------------------------------------===// + +unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &NewVRegs) { + CutOffInfo = CO_None; + LLVMContext &Ctx = MF->getFunction()->getContext(); + SmallVirtRegSet FixedRegisters; + unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); + if (Reg == ~0U && (CutOffInfo != CO_None)) { + uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); + if (CutOffEncountered == CO_Depth) + Ctx.emitError("register allocation failed: maximum depth for recoloring " + "reached. Use -fexhaustive-register-search to skip " + "cutoffs"); + else if (CutOffEncountered == CO_Interf) + Ctx.emitError("register allocation failed: maximum interference for " + "recoloring reached. Use -fexhaustive-register-search " + "to skip cutoffs"); + else if (CutOffEncountered == (CO_Depth | CO_Interf)) + Ctx.emitError("register allocation failed: maximum interference and " + "depth for recoloring reached. Use " + "-fexhaustive-register-search to skip cutoffs"); + } + return Reg; +} + +/// Using a CSR for the first time has a cost because it causes push|pop +/// to be added to prologue|epilogue. Splitting a cold section of the live +/// range can have lower cost than using the CSR for the first time; +/// Spilling a live range in the cold path can have lower cost than using +/// the CSR for the first time. Returns the physical register if we decide +/// to use the CSR; otherwise return 0. +unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, + AllocationOrder &Order, + unsigned PhysReg, + unsigned &CostPerUseLimit, + SmallVectorImpl<unsigned> &NewVRegs) { + if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { + // We choose spill over using the CSR for the first time if the spill cost + // is lower than CSRCost. + SA->analyze(&VirtReg); + if (calcSpillCost() >= CSRCost) + return PhysReg; + + // We are going to spill, set CostPerUseLimit to 1 to make sure that + // we will not use a callee-saved register in tryEvict. + CostPerUseLimit = 1; + return 0; + } + if (getStage(VirtReg) < RS_Split) { + // We choose pre-splitting over using the CSR for the first time if + // the cost of splitting is lower than CSRCost. + SA->analyze(&VirtReg); + unsigned NumCands = 0; + BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. + unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, + NumCands, true /*IgnoreCSR*/); + if (BestCand == NoCand) + // Use the CSR if we can't find a region split below CSRCost. + return PhysReg; + + // Perform the actual pre-splitting. + doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); + return 0; + } + return PhysReg; +} + +void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { + // Do not keep invalid information around. + SetOfBrokenHints.remove(&LI); +} + +void RAGreedy::initializeCSRCost() { + // We use the larger one out of the command-line option and the value report + // by TRI. + CSRCost = BlockFrequency( + std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); + if (!CSRCost.getFrequency()) + return; + + // Raw cost is relative to Entry == 2^14; scale it appropriately. + uint64_t ActualEntry = MBFI->getEntryFreq(); + if (!ActualEntry) { + CSRCost = 0; + return; + } + uint64_t FixedEntry = 1 << 14; + if (ActualEntry < FixedEntry) + CSRCost *= BranchProbability(ActualEntry, FixedEntry); + else if (ActualEntry <= UINT32_MAX) + // Invert the fraction and divide. + CSRCost /= BranchProbability(FixedEntry, ActualEntry); + else + // Can't use BranchProbability in general, since it takes 32-bit numbers. + CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); +} + +/// \brief Collect the hint info for \p Reg. +/// The results are stored into \p Out. +/// \p Out is not cleared before being populated. +void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { + for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { + if (!Instr.isFullCopy()) + continue; + // Look for the other end of the copy. + unsigned OtherReg = Instr.getOperand(0).getReg(); + if (OtherReg == Reg) { + OtherReg = Instr.getOperand(1).getReg(); + if (OtherReg == Reg) + continue; + } + // Get the current assignment. + unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) + ? OtherReg + : VRM->getPhys(OtherReg); + // Push the collected information. + Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, + OtherPhysReg)); + } +} + +/// \brief Using the given \p List, compute the cost of the broken hints if +/// \p PhysReg was used. +/// \return The cost of \p List for \p PhysReg. +BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, + unsigned PhysReg) { + BlockFrequency Cost = 0; + for (const HintInfo &Info : List) { + if (Info.PhysReg != PhysReg) + Cost += Info.Freq; + } + return Cost; +} + +/// \brief Using the register assigned to \p VirtReg, try to recolor +/// all the live ranges that are copy-related with \p VirtReg. +/// The recoloring is then propagated to all the live-ranges that have +/// been recolored and so on, until no more copies can be coalesced or +/// it is not profitable. +/// For a given live range, profitability is determined by the sum of the +/// frequencies of the non-identity copies it would introduce with the old +/// and new register. +void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { + // We have a broken hint, check if it is possible to fix it by + // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted + // some register and PhysReg may be available for the other live-ranges. + SmallSet<unsigned, 4> Visited; + SmallVector<unsigned, 2> RecoloringCandidates; + HintsInfo Info; + unsigned Reg = VirtReg.reg; + unsigned PhysReg = VRM->getPhys(Reg); + // Start the recoloring algorithm from the input live-interval, then + // it will propagate to the ones that are copy-related with it. + Visited.insert(Reg); + RecoloringCandidates.push_back(Reg); + + DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' + << PrintReg(PhysReg, TRI) << ")\n"); + + do { + Reg = RecoloringCandidates.pop_back_val(); + + // We cannot recolor physcal register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); + + // Get the live interval mapped with this virtual register to be able + // to check for the interference with the new color. + LiveInterval &LI = LIS->getInterval(Reg); + unsigned CurrPhys = VRM->getPhys(Reg); + // Check that the new color matches the register class constraints and + // that it is free for this live range. + if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || + Matrix->checkInterference(LI, PhysReg))) + continue; + + DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) + << ") is recolorable.\n"); + + // Gather the hint info. + Info.clear(); + collectHintInfo(Reg, Info); + // Check if recoloring the live-range will increase the cost of the + // non-identity copies. + if (CurrPhys != PhysReg) { + DEBUG(dbgs() << "Checking profitability:\n"); + BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); + BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); + DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() + << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); + if (OldCopiesCost < NewCopiesCost) { + DEBUG(dbgs() << "=> Not profitable.\n"); + continue; + } + // At this point, the cost is either cheaper or equal. If it is + // equal, we consider this is profitable because it may expose + // more recoloring opportunities. + DEBUG(dbgs() << "=> Profitable.\n"); + // Recolor the live-range. + Matrix->unassign(LI); + Matrix->assign(LI, PhysReg); + } + // Push all copy-related live-ranges to keep reconciling the broken + // hints. + for (const HintInfo &HI : Info) { + if (Visited.insert(HI.Reg).second) + RecoloringCandidates.push_back(HI.Reg); + } + } while (!RecoloringCandidates.empty()); +} + +/// \brief Try to recolor broken hints. +/// Broken hints may be repaired by recoloring when an evicted variable +/// freed up a register for a larger live-range. +/// Consider the following example: +/// BB1: +/// a = +/// b = +/// BB2: +/// ... +/// = b +/// = a +/// Let us assume b gets split: +/// BB1: +/// a = +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// = a +/// Because of how the allocation work, b, c, and d may be assigned different +/// colors. Now, if a gets evicted later: +/// BB1: +/// a = +/// st a, SpillSlot +/// b = +/// BB2: +/// c = b +/// ... +/// d = c +/// = d +/// e = ld SpillSlot +/// = e +/// This is likely that we can assign the same register for b, c, and d, +/// getting rid of 2 copies. +void RAGreedy::tryHintsRecoloring() { + for (LiveInterval *LI : SetOfBrokenHints) { + assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && + "Recoloring is possible only for virtual registers"); + // Some dead defs may be around (e.g., because of debug uses). + // Ignore those. + if (!VRM->hasPhys(LI->reg)) + continue; + tryHintRecoloring(*LI); + } +} + +unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, + SmallVectorImpl<unsigned> &NewVRegs, + SmallVirtRegSet &FixedRegisters, + unsigned Depth) { + unsigned CostPerUseLimit = ~0u; + // First try assigning a free register. + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { + // When NewVRegs is not empty, we may have made decisions such as evicting + // a virtual register, go with the earlier decisions and use the physical + // register. + if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && + NewVRegs.empty()) { + unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, + CostPerUseLimit, NewVRegs); + if (CSRReg || !NewVRegs.empty()) + // Return now if we decide to use a CSR or create new vregs due to + // pre-splitting. + return CSRReg; + } else + return PhysReg; + } + + LiveRangeStage Stage = getStage(VirtReg); + DEBUG(dbgs() << StageName[Stage] + << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); + + // Try to evict a less worthy live range, but only for ranges from the primary + // queue. The RS_Split ranges already failed to do this, and they should not + // get a second chance until they have been split. + if (Stage != RS_Split) + if (unsigned PhysReg = + tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { + unsigned Hint = MRI->getSimpleHint(VirtReg.reg); + // If VirtReg has a hint and that hint is broken record this + // virtual register as a recoloring candidate for broken hint. + // Indeed, since we evicted a variable in its neighborhood it is + // likely we can at least partially recolor some of the + // copy-related live-ranges. + if (Hint && Hint != PhysReg) + SetOfBrokenHints.insert(&VirtReg); + return PhysReg; + } + + assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); + + // The first time we see a live range, don't try to split or spill. + // Wait until the second time, when all smaller ranges have been allocated. + // This gives a better picture of the interference to split around. + if (Stage < RS_Split) { + setStage(VirtReg, RS_Split); + DEBUG(dbgs() << "wait for second round\n"); + NewVRegs.push_back(VirtReg.reg); + return 0; + } + + // If we couldn't allocate a register from spilling, there is probably some + // invalid inline assembly. The base class wil report it. + if (Stage >= RS_Done || !VirtReg.isSpillable()) + return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, + Depth); + + // Try splitting VirtReg or interferences. + unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); + if (PhysReg || !NewVRegs.empty()) + return PhysReg; + + // Finally spill VirtReg itself. + if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { + // TODO: This is experimental and in particular, we do not model + // the live range splitting done by spilling correctly. + // We would need a deep integration with the spiller to do the + // right thing here. Anyway, that is still good for early testing. + setStage(VirtReg, RS_Memory); + DEBUG(dbgs() << "Do as if this register is in memory\n"); + NewVRegs.push_back(VirtReg.reg); + } else { + NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); + LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); + + if (VerifyEnabled) + MF->verify(this, "After spilling"); + } + + // The live virtual register requesting allocation was spilled, so tell + // the caller not to allocate anything during this round. + return 0; +} + +bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { + DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" + << "********** Function: " << mf.getName() << '\n'); + + MF = &mf; + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + RCI.runOnMachineFunction(mf); + + EnableLocalReassign = EnableLocalReassignment || + MF->getSubtarget().enableRALocalReassignment( + MF->getTarget().getOptLevel()); + + if (VerifyEnabled) + MF->verify(this, "Before greedy register allocator"); + + RegAllocBase::init(getAnalysis<VirtRegMap>(), + getAnalysis<LiveIntervals>(), + getAnalysis<LiveRegMatrix>()); + Indexes = &getAnalysis<SlotIndexes>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + DomTree = &getAnalysis<MachineDominatorTree>(); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); + Loops = &getAnalysis<MachineLoopInfo>(); + Bundles = &getAnalysis<EdgeBundles>(); + SpillPlacer = &getAnalysis<SpillPlacement>(); + DebugVars = &getAnalysis<LiveDebugVariables>(); + + initializeCSRCost(); + + calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); + + DEBUG(LIS->dump()); + + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); + ExtraRegInfo.clear(); + ExtraRegInfo.resize(MRI->getNumVirtRegs()); + NextCascade = 1; + IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); + GlobalCand.resize(32); // This will grow as needed. + SetOfBrokenHints.clear(); + + allocatePhysRegs(); + tryHintsRecoloring(); + releaseMemory(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp new file mode 100644 index 0000000..fd28b05 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -0,0 +1,874 @@ +//===------ RegAllocPBQP.cpp ---- PBQP Register Allocator -------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a Partitioned Boolean Quadratic Programming (PBQP) based +// register allocator for LLVM. This allocator works by constructing a PBQP +// problem representing the register allocation problem under consideration, +// solving this using a PBQP solver, and mapping the solution back to a +// register assignment. If any variables are selected for spilling then spill +// code is inserted and the process repeated. +// +// The PBQP solver (pbqp.c) provided for this allocator uses a heuristic tuned +// for register allocation. For more information on PBQP for register +// allocation, see the following papers: +// +// (1) Hames, L. and Scholz, B. 2006. Nearly optimal register allocation with +// PBQP. In Proceedings of the 7th Joint Modular Languages Conference +// (JMLC'06). LNCS, vol. 4228. Springer, New York, NY, USA. 346-361. +// +// (2) Scholz, B., Eckstein, E. 2002. Register allocation for irregular +// architectures. In Proceedings of the Joint Conference on Languages, +// Compilers and Tools for Embedded Systems (LCTES'02), ACM Press, New York, +// NY, USA, 139-148. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegAllocPBQP.h" +#include "RegisterCoalescer.h" +#include "Spiller.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/Printable.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <limits> +#include <memory> +#include <queue> +#include <set> +#include <sstream> +#include <vector> + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +static RegisterRegAlloc +RegisterPBQPRepAlloc("pbqp", "PBQP register allocator", + createDefaultPBQPRegisterAllocator); + +static cl::opt<bool> +PBQPCoalescing("pbqp-coalescing", + cl::desc("Attempt coalescing during PBQP register allocation."), + cl::init(false), cl::Hidden); + +#ifndef NDEBUG +static cl::opt<bool> +PBQPDumpGraphs("pbqp-dump-graphs", + cl::desc("Dump graphs for each function/round in the compilation unit."), + cl::init(false), cl::Hidden); +#endif + +namespace { + +/// +/// PBQP based allocators solve the register allocation problem by mapping +/// register allocation problems to Partitioned Boolean Quadratic +/// Programming problems. +class RegAllocPBQP : public MachineFunctionPass { +public: + + static char ID; + + /// Construct a PBQP register allocator. + RegAllocPBQP(char *cPassID = nullptr) + : MachineFunctionPass(ID), customPassID(cPassID) { + initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); + initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); + initializeLiveStacksPass(*PassRegistry::getPassRegistry()); + initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); + } + + /// Return the pass name. + const char* getPassName() const override { + return "PBQP Register Allocator"; + } + + /// PBQP analysis usage. + void getAnalysisUsage(AnalysisUsage &au) const override; + + /// Perform register allocation + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + + typedef std::map<const LiveInterval*, unsigned> LI2NodeMap; + typedef std::vector<const LiveInterval*> Node2LIMap; + typedef std::vector<unsigned> AllowedSet; + typedef std::vector<AllowedSet> AllowedSetMap; + typedef std::pair<unsigned, unsigned> RegPair; + typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap; + typedef std::set<unsigned> RegSet; + + char *customPassID; + + RegSet VRegsToAlloc, EmptyIntervalVRegs; + + /// \brief Finds the initial set of vreg intervals to allocate. + void findVRegIntervalsToAlloc(const MachineFunction &MF, LiveIntervals &LIS); + + /// \brief Constructs an initial graph. + void initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, Spiller &VRegSpiller); + + /// \brief Spill the given VReg. + void spillVReg(unsigned VReg, SmallVectorImpl<unsigned> &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, VirtRegMap &VRM, + Spiller &VRegSpiller); + + /// \brief Given a solved PBQP problem maps this solution back to a register + /// assignment. + bool mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller); + + /// \brief Postprocessing before final spilling. Sets basic block "live in" + /// variables. + void finalizeAlloc(MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM) const; + +}; + +char RegAllocPBQP::ID = 0; + +/// @brief Set spill costs for each node in the PBQP reg-alloc graph. +class SpillCosts : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + LiveIntervals &LIS = G.getMetadata().LIS; + + // A minimum spill costs, so that register constraints can can be set + // without normalization in the [0.0:MinSpillCost( interval. + const PBQP::PBQPNum MinSpillCost = 10.0; + + for (auto NId : G.nodeIds()) { + PBQP::PBQPNum SpillCost = + LIS.getInterval(G.getNodeMetadata(NId).getVReg()).weight; + if (SpillCost == 0.0) + SpillCost = std::numeric_limits<PBQP::PBQPNum>::min(); + else + SpillCost += MinSpillCost; + PBQPRAGraph::RawVector NodeCosts(G.getNodeCosts(NId)); + NodeCosts[PBQP::RegAlloc::getSpillOptionIdx()] = SpillCost; + G.setNodeCosts(NId, std::move(NodeCosts)); + } + } +}; + +/// @brief Add interference edges between overlapping vregs. +class Interference : public PBQPRAConstraint { +private: + + typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr; + typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey; + typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache; + typedef DenseSet<IKey> DisjointAllowedRegsCache; + typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey; + typedef DenseSet<IEdgeKey> IEdgeCache; + + bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + const DisjointAllowedRegsCache &D) const { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + if (NRegs == MRegs) + return false; + + if (NRegs < MRegs) + return D.count(IKey(NRegs, MRegs)) > 0; + + return D.count(IKey(MRegs, NRegs)) > 0; + } + + void setDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId, + PBQPRAGraph::NodeId MId, + DisjointAllowedRegsCache &D) { + const auto *NRegs = &G.getNodeMetadata(NId).getAllowedRegs(); + const auto *MRegs = &G.getNodeMetadata(MId).getAllowedRegs(); + + assert(NRegs != MRegs && "AllowedRegs can not be disjoint with itself"); + + if (NRegs < MRegs) + D.insert(IKey(NRegs, MRegs)); + else + D.insert(IKey(MRegs, NRegs)); + } + + // Holds (Interval, CurrentSegmentID, and NodeId). The first two are required + // for the fast interference graph construction algorithm. The last is there + // to save us from looking up node ids via the VRegToNode map in the graph + // metadata. + typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId> + IntervalInfo; + + static SlotIndex getStartPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].start; + } + + static SlotIndex getEndPoint(const IntervalInfo &I) { + return std::get<0>(I)->segments[std::get<1>(I)].end; + } + + static PBQP::GraphBase::NodeId getNodeId(const IntervalInfo &I) { + return std::get<2>(I); + } + + static bool lowestStartPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + // Condition reversed because priority queue has the *highest* element at + // the front, rather than the lowest. + return getStartPoint(I1) > getStartPoint(I2); + } + + static bool lowestEndPoint(const IntervalInfo &I1, + const IntervalInfo &I2) { + SlotIndex E1 = getEndPoint(I1); + SlotIndex E2 = getEndPoint(I2); + + if (E1 < E2) + return true; + + if (E1 > E2) + return false; + + // If two intervals end at the same point, we need a way to break the tie or + // the set will assume they're actually equal and refuse to insert a + // "duplicate". Just compare the vregs - fast and guaranteed unique. + return std::get<0>(I1)->reg < std::get<0>(I2)->reg; + } + + static bool isAtLastSegment(const IntervalInfo &I) { + return std::get<1>(I) == std::get<0>(I)->size() - 1; + } + + static IntervalInfo nextSegment(const IntervalInfo &I) { + return std::make_tuple(std::get<0>(I), std::get<1>(I) + 1, std::get<2>(I)); + } + +public: + + void apply(PBQPRAGraph &G) override { + // The following is loosely based on the linear scan algorithm introduced in + // "Linear Scan Register Allocation" by Poletto and Sarkar. This version + // isn't linear, because the size of the active set isn't bound by the + // number of registers, but rather the size of the largest clique in the + // graph. Still, we expect this to be better than N^2. + LiveIntervals &LIS = G.getMetadata().LIS; + + // Interferenc matrices are incredibly regular - they're only a function of + // the allowed sets, so we cache them to avoid the overhead of constructing + // and uniquing them. + IMatrixCache C; + + // Finding an edge is expensive in the worst case (O(max_clique(G))). So + // cache locally edges we have already seen. + IEdgeCache EC; + + // Cache known disjoint allowed registers pairs + DisjointAllowedRegsCache D; + + typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet; + typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>, + decltype(&lowestStartPoint)> IntervalQueue; + IntervalSet Active(lowestEndPoint); + IntervalQueue Inactive(lowestStartPoint); + + // Start by building the inactive set. + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + LiveInterval &LI = LIS.getInterval(VReg); + assert(!LI.empty() && "PBQP graph contains node for empty interval"); + Inactive.push(std::make_tuple(&LI, 0, NId)); + } + + while (!Inactive.empty()) { + // Tentatively grab the "next" interval - this choice may be overriden + // below. + IntervalInfo Cur = Inactive.top(); + + // Retire any active intervals that end before Cur starts. + IntervalSet::iterator RetireItr = Active.begin(); + while (RetireItr != Active.end() && + (getEndPoint(*RetireItr) <= getStartPoint(Cur))) { + // If this interval has subsequent segments, add the next one to the + // inactive list. + if (!isAtLastSegment(*RetireItr)) + Inactive.push(nextSegment(*RetireItr)); + + ++RetireItr; + } + Active.erase(Active.begin(), RetireItr); + + // One of the newly retired segments may actually start before the + // Cur segment, so re-grab the front of the inactive list. + Cur = Inactive.top(); + Inactive.pop(); + + // At this point we know that Cur overlaps all active intervals. Add the + // interference edges. + PBQP::GraphBase::NodeId NId = getNodeId(Cur); + for (const auto &A : Active) { + PBQP::GraphBase::NodeId MId = getNodeId(A); + + // Do not add an edge when the nodes' allowed registers do not + // intersect: there is obviously no interference. + if (haveDisjointAllowedRegs(G, NId, MId, D)) + continue; + + // Check that we haven't already added this edge + IEdgeKey EK(std::min(NId, MId), std::max(NId, MId)); + if (EC.count(EK)) + continue; + + // This is a new edge - add it to the graph. + if (!createInterferenceEdge(G, NId, MId, C)) + setDisjointAllowedRegs(G, NId, MId, D); + else + EC.insert(EK); + } + + // Finally, add Cur to the Active set. + Active.insert(Cur); + } + } + +private: + + // Create an Interference edge and add it to the graph, unless it is + // a null matrix, meaning the nodes' allowed registers do not have any + // interference. This case occurs frequently between integer and floating + // point registers for example. + // return true iff both nodes interferes. + bool createInterferenceEdge(PBQPRAGraph &G, + PBQPRAGraph::NodeId NId, PBQPRAGraph::NodeId MId, + IMatrixCache &C) { + + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); + const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs(); + const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs(); + + // Try looking the edge costs up in the IMatrixCache first. + IKey K(&NRegs, &MRegs); + IMatrixCache::iterator I = C.find(K); + if (I != C.end()) { + G.addEdgeBypassingCostAllocator(NId, MId, I->second); + return true; + } + + PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0); + bool NodesInterfere = false; + for (unsigned I = 0; I != NRegs.size(); ++I) { + unsigned PRegN = NRegs[I]; + for (unsigned J = 0; J != MRegs.size(); ++J) { + unsigned PRegM = MRegs[J]; + if (TRI.regsOverlap(PRegN, PRegM)) { + M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity(); + NodesInterfere = true; + } + } + } + + if (!NodesInterfere) + return false; + + PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M)); + C[K] = G.getEdgeCostsPtr(EId); + + return true; + } +}; + + +class Coalescing : public PBQPRAConstraint { +public: + void apply(PBQPRAGraph &G) override { + MachineFunction &MF = G.getMetadata().MF; + MachineBlockFrequencyInfo &MBFI = G.getMetadata().MBFI; + CoalescerPair CP(*MF.getSubtarget().getRegisterInfo()); + + // Scan the machine function and add a coalescing cost whenever CoalescerPair + // gives the Ok. + for (const auto &MBB : MF) { + for (const auto &MI : MBB) { + + // Skip not-coalescable or already coalesced copies. + if (!CP.setRegisters(&MI) || CP.getSrcReg() == CP.getDstReg()) + continue; + + unsigned DstReg = CP.getDstReg(); + unsigned SrcReg = CP.getSrcReg(); + + const float Scale = 1.0f / MBFI.getEntryFreq(); + PBQP::PBQPNum CBenefit = MBFI.getBlockFreq(&MBB).getFrequency() * Scale; + + if (CP.isPhys()) { + if (!MF.getRegInfo().isAllocatable(DstReg)) + continue; + + PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg); + + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed = + G.getNodeMetadata(NId).getAllowedRegs(); + + unsigned PRegOpt = 0; + while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg) + ++PRegOpt; + + if (PRegOpt < Allowed.size()) { + PBQPRAGraph::RawVector NewCosts(G.getNodeCosts(NId)); + NewCosts[PRegOpt + 1] -= CBenefit; + G.setNodeCosts(NId, std::move(NewCosts)); + } + } else { + PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg); + PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 = + &G.getNodeMetadata(N1Id).getAllowedRegs(); + const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 = + &G.getNodeMetadata(N2Id).getAllowedRegs(); + + PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id); + if (EId == G.invalidEdgeId()) { + PBQPRAGraph::RawMatrix Costs(Allowed1->size() + 1, + Allowed2->size() + 1, 0); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.addEdge(N1Id, N2Id, std::move(Costs)); + } else { + if (G.getEdgeNode1Id(EId) == N2Id) { + std::swap(N1Id, N2Id); + std::swap(Allowed1, Allowed2); + } + PBQPRAGraph::RawMatrix Costs(G.getEdgeCosts(EId)); + addVirtRegCoalesce(Costs, *Allowed1, *Allowed2, CBenefit); + G.updateEdgeCosts(EId, std::move(Costs)); + } + } + } + } + } + +private: + + void addVirtRegCoalesce( + PBQPRAGraph::RawMatrix &CostMat, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1, + const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2, + PBQP::PBQPNum Benefit) { + assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch."); + assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch."); + for (unsigned I = 0; I != Allowed1.size(); ++I) { + unsigned PReg1 = Allowed1[I]; + for (unsigned J = 0; J != Allowed2.size(); ++J) { + unsigned PReg2 = Allowed2[J]; + if (PReg1 == PReg2) + CostMat[I + 1][J + 1] -= Benefit; + } + } + } + +}; + +} // End anonymous namespace. + +// Out-of-line destructor/anchor for PBQPRAConstraint. +PBQPRAConstraint::~PBQPRAConstraint() {} +void PBQPRAConstraint::anchor() {} +void PBQPRAConstraintList::anchor() {} + +void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesCFG(); + au.addRequired<AAResultsWrapperPass>(); + au.addPreserved<AAResultsWrapperPass>(); + au.addRequired<SlotIndexes>(); + au.addPreserved<SlotIndexes>(); + au.addRequired<LiveIntervals>(); + au.addPreserved<LiveIntervals>(); + //au.addRequiredID(SplitCriticalEdgesID); + if (customPassID) + au.addRequiredID(*customPassID); + au.addRequired<LiveStacks>(); + au.addPreserved<LiveStacks>(); + au.addRequired<MachineBlockFrequencyInfo>(); + au.addPreserved<MachineBlockFrequencyInfo>(); + au.addRequired<MachineLoopInfo>(); + au.addPreserved<MachineLoopInfo>(); + au.addRequired<MachineDominatorTree>(); + au.addPreserved<MachineDominatorTree>(); + au.addRequired<VirtRegMap>(); + au.addPreserved<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(au); +} + +void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF, + LiveIntervals &LIS) { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Iterate over all live ranges. + for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(I); + if (MRI.reg_nodbg_empty(Reg)) + continue; + LiveInterval &LI = LIS.getInterval(Reg); + + // If this live interval is non-empty we will use pbqp to allocate it. + // Empty intervals we allocate in a simple post-processing stage in + // finalizeAlloc. + if (!LI.empty()) { + VRegsToAlloc.insert(LI.reg); + } else { + EmptyIntervalVRegs.insert(LI.reg); + } + } +} + +static bool isACalleeSavedRegister(unsigned reg, const TargetRegisterInfo &TRI, + const MachineFunction &MF) { + const MCPhysReg *CSR = TRI.getCalleeSavedRegs(&MF); + for (unsigned i = 0; CSR[i] != 0; ++i) + if (TRI.regsOverlap(reg, CSR[i])) + return true; + return false; +} + +void RegAllocPBQP::initializeGraph(PBQPRAGraph &G, VirtRegMap &VRM, + Spiller &VRegSpiller) { + MachineFunction &MF = G.getMetadata().MF; + + LiveIntervals &LIS = G.getMetadata().LIS; + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo &TRI = + *G.getMetadata().MF.getSubtarget().getRegisterInfo(); + + std::vector<unsigned> Worklist(VRegsToAlloc.begin(), VRegsToAlloc.end()); + + while (!Worklist.empty()) { + unsigned VReg = Worklist.back(); + Worklist.pop_back(); + + const TargetRegisterClass *TRC = MRI.getRegClass(VReg); + LiveInterval &VRegLI = LIS.getInterval(VReg); + + // Record any overlaps with regmask operands. + BitVector RegMaskOverlaps; + LIS.checkRegMaskInterference(VRegLI, RegMaskOverlaps); + + // Compute an initial allowed set for the current vreg. + std::vector<unsigned> VRegAllowed; + ArrayRef<MCPhysReg> RawPRegOrder = TRC->getRawAllocationOrder(MF); + for (unsigned I = 0; I != RawPRegOrder.size(); ++I) { + unsigned PReg = RawPRegOrder[I]; + if (MRI.isReserved(PReg)) + continue; + + // vregLI crosses a regmask operand that clobbers preg. + if (!RegMaskOverlaps.empty() && !RegMaskOverlaps.test(PReg)) + continue; + + // vregLI overlaps fixed regunit interference. + bool Interference = false; + for (MCRegUnitIterator Units(PReg, &TRI); Units.isValid(); ++Units) { + if (VRegLI.overlaps(LIS.getRegUnit(*Units))) { + Interference = true; + break; + } + } + if (Interference) + continue; + + // preg is usable for this virtual register. + VRegAllowed.push_back(PReg); + } + + // Check for vregs that have no allowed registers. These should be + // pre-spilled and the new vregs added to the worklist. + if (VRegAllowed.empty()) { + SmallVector<unsigned, 8> NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + Worklist.insert(Worklist.end(), NewVRegs.begin(), NewVRegs.end()); + continue; + } + + PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0); + + // Tweak cost of callee saved registers, as using then force spilling and + // restoring them. This would only happen in the prologue / epilogue though. + for (unsigned i = 0; i != VRegAllowed.size(); ++i) + if (isACalleeSavedRegister(VRegAllowed[i], TRI, MF)) + NodeCosts[1 + i] += 1.0; + + PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts)); + G.getNodeMetadata(NId).setVReg(VReg); + G.getNodeMetadata(NId).setAllowedRegs( + G.getMetadata().getAllowedRegs(std::move(VRegAllowed))); + G.getMetadata().setNodeIdForVReg(VReg, NId); + } +} + +void RegAllocPBQP::spillVReg(unsigned VReg, + SmallVectorImpl<unsigned> &NewIntervals, + MachineFunction &MF, LiveIntervals &LIS, + VirtRegMap &VRM, Spiller &VRegSpiller) { + + VRegsToAlloc.erase(VReg); + LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM); + VRegSpiller.spill(LRE); + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + (void)TRI; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> SPILLED (Cost: " + << LRE.getParent().weight << ", New vregs: "); + + // Copy any newly inserted live intervals into the list of regs to + // allocate. + for (LiveRangeEdit::iterator I = LRE.begin(), E = LRE.end(); + I != E; ++I) { + const LiveInterval &LI = LIS.getInterval(*I); + assert(!LI.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(LI.reg, &TRI) << " "); + VRegsToAlloc.insert(LI.reg); + } + + DEBUG(dbgs() << ")\n"); +} + +bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G, + const PBQP::Solution &Solution, + VirtRegMap &VRM, + Spiller &VRegSpiller) { + MachineFunction &MF = G.getMetadata().MF; + LiveIntervals &LIS = G.getMetadata().LIS; + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + (void)TRI; + + // Set to true if we have any spills + bool AnotherRoundNeeded = false; + + // Clear the existing allocation. + VRM.clearAllVirt(); + + // Iterate over the nodes mapping the PBQP solution to a register + // assignment. + for (auto NId : G.nodeIds()) { + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + unsigned AllocOption = Solution.getSelection(NId); + + if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) { + unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1]; + DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> " + << TRI.getName(PReg) << "\n"); + assert(PReg != 0 && "Invalid preg selected."); + VRM.assignVirt2Phys(VReg, PReg); + } else { + // Spill VReg. If this introduces new intervals we'll need another round + // of allocation. + SmallVector<unsigned, 8> NewVRegs; + spillVReg(VReg, NewVRegs, MF, LIS, VRM, VRegSpiller); + AnotherRoundNeeded |= !NewVRegs.empty(); + } + } + + return !AnotherRoundNeeded; +} + +void RegAllocPBQP::finalizeAlloc(MachineFunction &MF, + LiveIntervals &LIS, + VirtRegMap &VRM) const { + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // First allocate registers for the empty intervals. + for (RegSet::const_iterator + I = EmptyIntervalVRegs.begin(), E = EmptyIntervalVRegs.end(); + I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + + unsigned PReg = MRI.getSimpleHint(LI.reg); + + if (PReg == 0) { + const TargetRegisterClass &RC = *MRI.getRegClass(LI.reg); + PReg = RC.getRawAllocationOrder(MF).front(); + } + + VRM.assignVirt2Phys(LI.reg, PReg); + } +} + +static inline float normalizePBQPSpillWeight(float UseDefFreq, unsigned Size, + unsigned NumInstr) { + // All intervals have a spill weight that is mostly proportional to the number + // of uses, with uses in loops having a bigger weight. + return NumInstr * normalizeSpillWeight(UseDefFreq, Size, 1); +} + +bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { + LiveIntervals &LIS = getAnalysis<LiveIntervals>(); + MachineBlockFrequencyInfo &MBFI = + getAnalysis<MachineBlockFrequencyInfo>(); + + VirtRegMap &VRM = getAnalysis<VirtRegMap>(); + + calculateSpillWeightsAndHints(LIS, MF, &VRM, getAnalysis<MachineLoopInfo>(), + MBFI, normalizePBQPSpillWeight); + + std::unique_ptr<Spiller> VRegSpiller(createInlineSpiller(*this, MF, VRM)); + + MF.getRegInfo().freezeReservedRegs(MF); + + DEBUG(dbgs() << "PBQP Register Allocating for " << MF.getName() << "\n"); + + // Allocator main loop: + // + // * Map current regalloc problem to a PBQP problem + // * Solve the PBQP problem + // * Map the solution back to a register allocation + // * Spill if necessary + // + // This process is continued till no more spills are generated. + + // Find the vreg intervals in need of allocation. + findVRegIntervalsToAlloc(MF, LIS); + +#ifndef NDEBUG + const Function &F = *MF.getFunction(); + std::string FullyQualifiedName = + F.getParent()->getModuleIdentifier() + "." + F.getName().str(); +#endif + + // If there are non-empty intervals allocate them using pbqp. + if (!VRegsToAlloc.empty()) { + + const TargetSubtargetInfo &Subtarget = MF.getSubtarget(); + std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot = + llvm::make_unique<PBQPRAConstraintList>(); + ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>()); + ConstraintsRoot->addConstraint(llvm::make_unique<Interference>()); + if (PBQPCoalescing) + ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>()); + ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints()); + + bool PBQPAllocComplete = false; + unsigned Round = 0; + + while (!PBQPAllocComplete) { + DEBUG(dbgs() << " PBQP Regalloc round " << Round << ":\n"); + + PBQPRAGraph G(PBQPRAGraph::GraphMetadata(MF, LIS, MBFI)); + initializeGraph(G, VRM, *VRegSpiller); + ConstraintsRoot->apply(G); + +#ifndef NDEBUG + if (PBQPDumpGraphs) { + std::ostringstream RS; + RS << Round; + std::string GraphFileName = FullyQualifiedName + "." + RS.str() + + ".pbqpgraph"; + std::error_code EC; + raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text); + DEBUG(dbgs() << "Dumping graph for round " << Round << " to \"" + << GraphFileName << "\"\n"); + G.dump(OS); + } +#endif + + PBQP::Solution Solution = PBQP::RegAlloc::solve(G); + PBQPAllocComplete = mapPBQPToRegAlloc(G, Solution, VRM, *VRegSpiller); + ++Round; + } + } + + // Finalise allocation, allocate empty ranges. + finalizeAlloc(MF, LIS, VRM); + VRegsToAlloc.clear(); + EmptyIntervalVRegs.clear(); + + DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << VRM << "\n"); + + return true; +} + +/// Create Printable object for node and register info. +static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, + const PBQP::RegAlloc::PBQPRAGraph &G) { + return Printable([NId, &G](raw_ostream &OS) { + const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + unsigned VReg = G.getNodeMetadata(NId).getVReg(); + const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); + OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; + }); +} + +void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { + for (auto NId : nodeIds()) { + const Vector &Costs = getNodeCosts(NId); + assert(Costs.getLength() != 0 && "Empty vector in graph."); + OS << PrintNodeInfo(NId, *this) << ": " << Costs << '\n'; + } + OS << '\n'; + + for (auto EId : edgeIds()) { + NodeId N1Id = getEdgeNode1Id(EId); + NodeId N2Id = getEdgeNode2Id(EId); + assert(N1Id != N2Id && "PBQP graphs should not have self-edges."); + const Matrix &M = getEdgeCosts(EId); + assert(M.getRows() != 0 && "No rows in matrix."); + assert(M.getCols() != 0 && "No cols in matrix."); + OS << PrintNodeInfo(N1Id, *this) << ' ' << M.getRows() << " rows / "; + OS << PrintNodeInfo(N2Id, *this) << ' ' << M.getCols() << " cols:\n"; + OS << M << '\n'; + } +} + +void PBQP::RegAlloc::PBQPRAGraph::dump() const { dump(dbgs()); } + +void PBQP::RegAlloc::PBQPRAGraph::printDot(raw_ostream &OS) const { + OS << "graph {\n"; + for (auto NId : nodeIds()) { + OS << " node" << NId << " [ label=\"" + << PrintNodeInfo(NId, *this) << "\\n" + << getNodeCosts(NId) << "\" ]\n"; + } + + OS << " edge [ len=" << nodeIds().size() << " ]\n"; + for (auto EId : edgeIds()) { + OS << " node" << getEdgeNode1Id(EId) + << " -- node" << getEdgeNode2Id(EId) + << " [ label=\""; + const Matrix &EdgeCosts = getEdgeCosts(EId); + for (unsigned i = 0; i < EdgeCosts.getRows(); ++i) { + OS << EdgeCosts.getRowAsVector(i) << "\\n"; + } + OS << "\" ]\n"; + } + OS << "}\n"; +} + +FunctionPass *llvm::createPBQPRegisterAllocator(char *customPassID) { + return new RegAllocPBQP(customPassID); +} + +FunctionPass* llvm::createDefaultPBQPRegisterAllocator() { + return createPBQPRegisterAllocator(); +} + +#undef DEBUG_TYPE diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp new file mode 100644 index 0000000..178fa18 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -0,0 +1,181 @@ +//===-- RegisterClassInfo.cpp - Dynamic Register Class Info ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterClassInfo class which provides dynamic +// information about target register classes. Callee-saved vs. caller-saved and +// reserved registers depend on calling conventions and other dynamic +// information, so some things cannot be determined statically. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +static cl::opt<unsigned> +StressRA("stress-regalloc", cl::Hidden, cl::init(0), cl::value_desc("N"), + cl::desc("Limit all regclasses to N registers")); + +RegisterClassInfo::RegisterClassInfo() + : Tag(0), MF(nullptr), TRI(nullptr), CalleeSaved(nullptr) {} + +void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { + bool Update = false; + MF = &mf; + + // Allocate new array the first time we see a new target. + if (MF->getSubtarget().getRegisterInfo() != TRI) { + TRI = MF->getSubtarget().getRegisterInfo(); + RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + unsigned NumPSets = TRI->getNumRegPressureSets(); + PSetLimits.reset(new unsigned[NumPSets]); + std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); + Update = true; + } + + // Does this MF have different CSRs? + assert(TRI && "no register info set"); + const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); + if (Update || CSR != CalleeSaved) { + // Build a CSRNum map. Every CSR alias gets an entry pointing to the last + // overlapping CSR. + CSRNum.clear(); + CSRNum.resize(TRI->getNumRegs(), 0); + for (unsigned N = 0; unsigned Reg = CSR[N]; ++N) + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + CSRNum[*AI] = N + 1; // 0 means no CSR, 1 means CalleeSaved[0], ... + Update = true; + } + CalleeSaved = CSR; + + // Different reserved registers? + const BitVector &RR = MF->getRegInfo().getReservedRegs(); + if (Reserved.size() != RR.size() || RR != Reserved) { + Update = true; + Reserved = RR; + } + + // Invalidate cached information from previous function. + if (Update) + ++Tag; +} + +/// compute - Compute the preferred allocation order for RC with reserved +/// registers filtered out. Volatile registers come first followed by CSR +/// aliases ordered according to the CSR order specified by the target. +void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { + assert(RC && "no register class given"); + RCInfo &RCI = RegClass[RC->getID()]; + + // Raw register count, including all reserved regs. + unsigned NumRegs = RC->getNumRegs(); + + if (!RCI.Order) + RCI.Order.reset(new MCPhysReg[NumRegs]); + + unsigned N = 0; + SmallVector<MCPhysReg, 16> CSRAlias; + unsigned MinCost = 0xff; + unsigned LastCost = ~0u; + unsigned LastCostChange = 0; + + // FIXME: Once targets reserve registers instead of removing them from the + // allocation order, we can simply use begin/end here. + ArrayRef<MCPhysReg> RawOrder = RC->getRawAllocationOrder(*MF); + for (unsigned i = 0; i != RawOrder.size(); ++i) { + unsigned PhysReg = RawOrder[i]; + // Remove reserved registers from the allocation order. + if (Reserved.test(PhysReg)) + continue; + unsigned Cost = TRI->getCostPerUse(PhysReg); + MinCost = std::min(MinCost, Cost); + + if (CSRNum[PhysReg]) + // PhysReg aliases a CSR, save it for later. + CSRAlias.push_back(PhysReg); + else { + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } + } + RCI.NumRegs = N + CSRAlias.size(); + assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); + + // CSR aliases go after the volatile registers, preserve the target's order. + for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { + unsigned PhysReg = CSRAlias[i]; + unsigned Cost = TRI->getCostPerUse(PhysReg); + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } + + // Register allocator stress test. Clip register class to N registers. + if (StressRA && RCI.NumRegs > StressRA) + RCI.NumRegs = StressRA; + + // Check if RC is a proper sub-class. + if (const TargetRegisterClass *Super = + TRI->getLargestLegalSuperClass(RC, *MF)) + if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) + RCI.ProperSubClass = true; + + RCI.MinCost = uint8_t(MinCost); + RCI.LastCostChange = LastCostChange; + + DEBUG({ + dbgs() << "AllocationOrder(" << TRI->getRegClassName(RC) << ") = ["; + for (unsigned I = 0; I != RCI.NumRegs; ++I) + dbgs() << ' ' << PrintReg(RCI.Order[I], TRI); + dbgs() << (RCI.ProperSubClass ? " ] (sub-class)\n" : " ]\n"); + }); + + // RCI is now up-to-date. + RCI.Tag = Tag; +} + +/// This is not accurate because two overlapping register sets may have some +/// nonoverlapping reserved registers. However, computing the allocation order +/// for all register classes would be too expensive. +unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { + const TargetRegisterClass *RC = nullptr; + unsigned NumRCUnits = 0; + for (TargetRegisterInfo::regclass_iterator + RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { + const int *PSetID = TRI->getRegClassPressureSets(*RI); + for (; *PSetID != -1; ++PSetID) { + if ((unsigned)*PSetID == Idx) + break; + } + if (*PSetID == -1) + continue; + + // Found a register class that counts against this pressure set. + // For efficiency, only compute the set order for the largest set. + unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit; + if (!RC || NUnits > NumRCUnits) { + RC = *RI; + NumRCUnits = NUnits; + } + } + compute(RC); + unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); + return TRI->getRegPressureSetLimit(*MF, Idx) - + TRI->getRegClassWeight(RC).RegWeight * NReserved; +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp new file mode 100644 index 0000000..e7b3217 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -0,0 +1,2985 @@ +//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the generic RegisterCoalescer interface which +// is used as the common interface used by all clients and +// implementations of register coalescing. +// +//===----------------------------------------------------------------------===// + +#include "RegisterCoalescer.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cmath> +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(numJoins , "Number of interval joins performed"); +STATISTIC(numCrossRCs , "Number of cross class joins performed"); +STATISTIC(numCommutes , "Number of instruction commuting performed"); +STATISTIC(numExtends , "Number of copies extended"); +STATISTIC(NumReMats , "Number of instructions re-materialized"); +STATISTIC(NumInflated , "Number of register classes inflated"); +STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); +STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); + +static cl::opt<bool> +EnableJoining("join-liveintervals", + cl::desc("Coalesce copies (default=true)"), + cl::init(true)); + +static cl::opt<bool> UseTerminalRule("terminal-rule", + cl::desc("Apply the terminal rule"), + cl::init(false), cl::Hidden); + +/// Temporary flag to test critical edge unsplitting. +static cl::opt<bool> +EnableJoinSplits("join-splitedges", + cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); + +/// Temporary flag to test global copy optimization. +static cl::opt<cl::boolOrDefault> +EnableGlobalCopies("join-globalcopies", + cl::desc("Coalesce copies that span blocks (default=subtarget)"), + cl::init(cl::BOU_UNSET), cl::Hidden); + +static cl::opt<bool> +VerifyCoalescing("verify-coalescing", + cl::desc("Verify machine instrs before and after register coalescing"), + cl::Hidden); + +namespace { + class RegisterCoalescer : public MachineFunctionPass, + private LiveRangeEdit::Delegate { + MachineFunction* MF; + MachineRegisterInfo* MRI; + const TargetMachine* TM; + const TargetRegisterInfo* TRI; + const TargetInstrInfo* TII; + LiveIntervals *LIS; + const MachineLoopInfo* Loops; + AliasAnalysis *AA; + RegisterClassInfo RegClassInfo; + + /// A LaneMask to remember on which subregister live ranges we need to call + /// shrinkToUses() later. + LaneBitmask ShrinkMask; + + /// True if the main range of the currently coalesced intervals should be + /// checked for smaller live intervals. + bool ShrinkMainRange; + + /// \brief True if the coalescer should aggressively coalesce global copies + /// in favor of keeping local copies. + bool JoinGlobalCopies; + + /// \brief True if the coalescer should aggressively coalesce fall-thru + /// blocks exclusively containing copies. + bool JoinSplitEdges; + + /// Copy instructions yet to be coalesced. + SmallVector<MachineInstr*, 8> WorkList; + SmallVector<MachineInstr*, 8> LocalWorkList; + + /// Set of instruction pointers that have been erased, and + /// that may be present in WorkList. + SmallPtrSet<MachineInstr*, 8> ErasedInstrs; + + /// Dead instructions that are about to be deleted. + SmallVector<MachineInstr*, 8> DeadDefs; + + /// Virtual registers to be considered for register class inflation. + SmallVector<unsigned, 8> InflateRegs; + + /// Recursively eliminate dead defs in DeadDefs. + void eliminateDeadDefs(); + + /// LiveRangeEdit callback for eliminateDeadDefs(). + void LRE_WillEraseInstruction(MachineInstr *MI) override; + + /// Coalesce the LocalWorkList. + void coalesceLocals(); + + /// Join compatible live intervals + void joinAllIntervals(); + + /// Coalesce copies in the specified MBB, putting + /// copies that cannot yet be coalesced into WorkList. + void copyCoalesceInMBB(MachineBasicBlock *MBB); + + /// Tries to coalesce all copies in CurrList. Returns true if any progress + /// was made. + bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); + + /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the + /// src/dst of the copy instruction CopyMI. This returns true if the copy + /// was successfully coalesced away. If it is not currently possible to + /// coalesce this interval, but it may be possible if other things get + /// coalesced, then it returns true by reference in 'Again'. + bool joinCopy(MachineInstr *TheCopy, bool &Again); + + /// Attempt to join these two intervals. On failure, this + /// returns false. The output "SrcInt" will not have been modified, so we + /// can use this information below to update aliases. + bool joinIntervals(CoalescerPair &CP); + + /// Attempt joining two virtual registers. Return true on success. + bool joinVirtRegs(CoalescerPair &CP); + + /// Attempt joining with a reserved physreg. + bool joinReservedPhysReg(CoalescerPair &CP); + + /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. + /// Subranges in @p LI which only partially interfere with the desired + /// LaneMask are split as necessary. @p LaneMask are the lanes that + /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange + /// lanemasks already adjusted to the coalesced register. + void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, + LaneBitmask LaneMask, CoalescerPair &CP); + + /// Join the liveranges of two subregisters. Joins @p RRange into + /// @p LRange, @p RRange may be invalid afterwards. + void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + LaneBitmask LaneMask, const CoalescerPair &CP); + + /// We found a non-trivially-coalescable copy. If the source value number is + /// defined by a copy from the destination reg see if we can merge these two + /// destination reg valno# into a single value number, eliminating a copy. + /// This returns true if an interval was modified. + bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); + + /// Return true if there are definitions of IntB + /// other than BValNo val# that can reach uses of AValno val# of IntA. + bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, + VNInfo *AValNo, VNInfo *BValNo); + + /// We found a non-trivially-coalescable copy. + /// If the source value number is defined by a commutable instruction and + /// its other operand is coalesced to the copy dest register, see if we + /// can transform the copy into a noop by commuting the definition. + /// This returns true if an interval was modified. + bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); + + /// If the source of a copy is defined by a + /// trivial computation, replace the copy by rematerialize the definition. + bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, + bool &IsDefCopy); + + /// Return true if a copy involving a physreg should be joined. + bool canJoinPhys(const CoalescerPair &CP); + + /// Replace all defs and uses of SrcReg to DstReg and update the subregister + /// number if it is not zero. If DstReg is a physical register and the + /// existing subregister number of the def / use being updated is not zero, + /// make sure to set it to the correct physical subregister. + void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + + /// Handle copies of undef values. + /// Returns true if @p CopyMI was a copy of an undef value and eliminated. + bool eliminateUndefCopy(MachineInstr *CopyMI); + + /// Check whether or not we should apply the terminal rule on the + /// destination (Dst) of \p Copy. + /// When the terminal rule applies, Copy is not profitable to + /// coalesce. + /// Dst is terminal if it has exactly one affinity (Dst, Src) and + /// at least one interference (Dst, Dst2). If Dst is terminal, the + /// terminal rule consists in checking that at least one of + /// interfering node, say Dst2, has an affinity of equal or greater + /// weight with Src. + /// In that case, Dst2 and Dst will not be able to be both coalesced + /// with Src. Since Dst2 exposes more coalescing opportunities than + /// Dst, we can drop \p Copy. + bool applyTerminalRule(const MachineInstr &Copy) const; + + /// Wrapper method for \see LiveIntervals::shrinkToUses. + /// This method does the proper fixing of the live-ranges when the afore + /// mentioned method returns true. + void shrinkToUses(LiveInterval *LI, + SmallVectorImpl<MachineInstr * > *Dead = nullptr) { + if (LIS->shrinkToUses(LI, Dead)) { + /// Check whether or not \p LI is composed by multiple connected + /// components and if that is the case, fix that. + SmallVector<LiveInterval*, 8> SplitLIs; + LIS->splitSeparateComponents(*LI, SplitLIs); + } + } + + public: + static char ID; ///< Class identification, replacement for typeinfo + RegisterCoalescer() : MachineFunctionPass(ID) { + initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + void releaseMemory() override; + + /// This is the pass entry point. + bool runOnMachineFunction(MachineFunction&) override; + + /// Implement the dump method. + void print(raw_ostream &O, const Module* = nullptr) const override; + }; +} // end anonymous namespace + +char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; + +INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", + "Simple Register Coalescing", false, false) + +char RegisterCoalescer::ID = 0; + +static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), + MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else + return false; + return true; +} + +/// Return true if this block should be vacated by the coalescer to eliminate +/// branches. The important cases to handle in the coalescer are critical edges +/// split during phi elimination which contain only copies. Simple blocks that +/// contain non-branches should also be vacated, but this can be handled by an +/// earlier pass similar to early if-conversion. +static bool isSplitEdge(const MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + for (const auto &MI : *MBB) { + if (!MI.isCopyLike() && !MI.isUnconditionalBranch()) + return false; + } + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + SrcReg = DstReg = 0; + SrcIdx = DstIdx = 0; + NewRC = nullptr; + Flipped = CrossClass = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) + return false; + Partial = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + Flipped = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = TRI.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + + // Both registers have subreg indices. + if (SrcSub && DstSub) { + // Copies between different sub-registers are never coalescable. + if (Src == Dst && SrcSub != DstSub) + return false; + + NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, + SrcIdx, DstIdx); + if (!NewRC) + return false; + } else if (DstSub) { + // SrcReg will be merged with a sub-register of DstReg. + SrcIdx = DstSub; + NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + } else if (SrcSub) { + // DstReg will be merged with a sub-register of SrcReg. + DstIdx = SrcSub; + NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); + } else { + // This is a straight copy without sub-registers. + NewRC = TRI.getCommonSubClass(DstRC, SrcRC); + } + + // The combined constraint may be impossible to satisfy. + if (!NewRC) + return false; + + // Prefer SrcReg to be a sub-register of DstReg. + // FIXME: Coalescer should support subregs symmetrically. + if (DstIdx && !SrcIdx) { + std::swap(Src, Dst); + std::swap(SrcIdx, DstIdx); + Flipped = !Flipped; + } + + CrossClass = NewRC != DstRC || NewRC != SrcRC; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + SrcReg = Src; + DstReg = Dst; + return true; +} + +bool CoalescerPair::flip() { + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + return false; + std::swap(SrcReg, DstReg); + std::swap(SrcIdx, DstIdx); + Flipped = !Flipped; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is SrcReg. + if (Dst == SrcReg) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != SrcReg) { + return false; + } + + // Now check that Dst matches DstReg. + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = TRI.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return DstReg == Dst; + // This is a partial register copy. Check that the parts match. + return TRI.getSubReg(DstReg, SrcSub) == Dst; + } else { + // DstReg is virtual. + if (DstReg != Dst) + return false; + // Registers match, do the subregisters line up? + return TRI.composeSubRegIndices(SrcIdx, SrcSub) == + TRI.composeSubRegIndices(DstIdx, DstSub); + } +} + +void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<MachineLoopInfo>(); + AU.addPreserved<MachineLoopInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void RegisterCoalescer::eliminateDeadDefs() { + SmallVector<unsigned, 8> NewRegs; + LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, + nullptr, this).eliminateDeadDefs(DeadDefs); +} + +void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { + // MI may be in WorkList. Make sure we don't visit it. + ErasedInstrs.insert(MI); +} + +bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert(!CP.isPartial() && "This doesn't work for partial copies."); + assert(!CP.isPhys() && "This doesn't work for physreg copies."); + + LiveInterval &IntA = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + + // We have a non-trivially-coalescable copy with IntA being the source and + // IntB being the dest, thus this defines a value number in IntB. If the + // source value number (in IntA) is defined by a copy from B, see if we can + // merge these two pieces of B into a single value number, eliminating a copy. + // For example: + // + // A3 = B0 + // ... + // B1 = A3 <- this copy + // + // In this case, B0 can be extended to where the B1 copy lives, allowing the + // B1 value number to be replaced with B0 (which simplifies the B + // liveinterval). + + // BValNo is a value number in B that is defined by a copy from A. 'B1' in + // the example above. + LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); + if (BS == IntB.end()) return false; + VNInfo *BValNo = BS->valno; + + // Get the location that B is defined at. Two options: either this value has + // an unknown definition point or it is defined at CopyIdx. If unknown, we + // can't process it. + if (BValNo->def != CopyIdx) return false; + + // AValNo is the value number in A that defines the copy, A3 in the example. + SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); + LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); + // The live segment might not exist after fun with physreg coalescing. + if (AS == IntA.end()) return false; + VNInfo *AValNo = AS->valno; + + // If AValNo is defined as a copy from IntB, we can potentially process this. + // Get the instruction that defines this value number. + MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); + // Don't allow any partial copies, even if isCoalescable() allows them. + if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) + return false; + + // Get the Segment in IntB that this value number starts with. + LiveInterval::iterator ValS = + IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); + if (ValS == IntB.end()) + return false; + + // Make sure that the end of the live segment is inside the same block as + // CopyMI. + MachineInstr *ValSEndInst = + LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); + if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) + return false; + + // Okay, we now know that ValS ends in the same block that the CopyMI + // live-range starts. If there are no intervening live segments between them + // in IntB, we can merge them. + if (ValS+1 != BS) return false; + + DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); + + SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; + // We are about to delete CopyMI, so need to remove it as the 'instruction + // that defines this value #'. Update the valnum with the new defining + // instruction #. + BValNo->def = FillerStart; + + // Okay, we can merge them. We need to insert a new liverange: + // [ValS.end, BS.begin) of either value number, then we merge the + // two value numbers. + IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); + + // Okay, merge "B1" into the same value number as "B0". + if (BValNo != ValS->valno) + IntB.MergeValueNumberInto(BValNo, ValS->valno); + + // Do the same for the subregister segments. + for (LiveInterval::SubRange &S : IntB.subranges()) { + VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); + S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); + VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); + if (SubBValNo != SubValSNo) + S.MergeValueNumberInto(SubBValNo, SubValSNo); + } + + DEBUG(dbgs() << " result = " << IntB << '\n'); + + // If the source instruction was killing the source register before the + // merge, unset the isKill marker given the live range has been extended. + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); + if (UIdx != -1) { + ValSEndInst->getOperand(UIdx).setIsKill(false); + } + + // Rewrite the copy. If the copy instruction was killing the destination + // register before the merge, find the last use and trim the live range. That + // will also add the isKill marker. + CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); + if (AS->end == CopyIdx) + shrinkToUses(&IntA); + + ++numExtends; + return true; +} + +bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, + LiveInterval &IntB, + VNInfo *AValNo, + VNInfo *BValNo) { + // If AValNo has PHI kills, conservatively assume that IntB defs can reach + // the PHI values. + if (LIS->hasPHIKill(IntA, AValNo)) + return true; + + for (LiveRange::Segment &ASeg : IntA.segments) { + if (ASeg.valno != AValNo) continue; + LiveInterval::iterator BI = + std::upper_bound(IntB.begin(), IntB.end(), ASeg.start); + if (BI != IntB.begin()) + --BI; + for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { + if (BI->valno == BValNo) + continue; + if (BI->start <= ASeg.start && BI->end > ASeg.start) + return true; + if (BI->start > ASeg.start && BI->start < ASeg.end) + return true; + } + } + return false; +} + +/// Copy segements with value number @p SrcValNo from liverange @p Src to live +/// range @Dst and use value number @p DstValNo there. +static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, + const LiveRange &Src, const VNInfo *SrcValNo) +{ + for (const LiveRange::Segment &S : Src.segments) { + if (S.valno != SrcValNo) + continue; + Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); + } +} + +bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, + MachineInstr *CopyMI) { + assert(!CP.isPhys()); + + LiveInterval &IntA = + LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + + // We found a non-trivially-coalescable copy with IntA being the source and + // IntB being the dest, thus this defines a value number in IntB. If the + // source value number (in IntA) is defined by a commutable instruction and + // its other operand is coalesced to the copy dest register, see if we can + // transform the copy into a noop by commuting the definition. For example, + // + // A3 = op A2 B0<kill> + // ... + // B1 = A3 <- this copy + // ... + // = op A3 <- more uses + // + // ==> + // + // B2 = op B0 A2<kill> + // ... + // B1 = B2 <- now an identity copy + // ... + // = op B2 <- more uses + + // BValNo is a value number in B that is defined by a copy from A. 'B1' in + // the example above. + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); + assert(BValNo != nullptr && BValNo->def == CopyIdx); + + // AValNo is the value number in A that defines the copy, A3 in the example. + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); + assert(AValNo && !AValNo->isUnused() && "COPY source not live"); + if (AValNo->isPHIDef()) + return false; + MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; + if (!DefMI->isCommutable()) + return false; + // If DefMI is a two-address instruction then commuting it will change the + // destination register. + int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); + assert(DefIdx != -1); + unsigned UseOpIdx; + if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) + return false; + + // FIXME: The code below tries to commute 'UseOpIdx' operand with some other + // commutable operand which is expressed by 'CommuteAnyOperandIndex'value + // passed to the method. That _other_ operand is chosen by + // the findCommutedOpIndices() method. + // + // That is obviously an area for improvement in case of instructions having + // more than 2 operands. For example, if some instruction has 3 commutable + // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, + // op#2<->op#3) of commute transformation should be considered/tried here. + unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; + if (!TII->findCommutedOpIndices(DefMI, UseOpIdx, NewDstIdx)) + return false; + + MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); + unsigned NewReg = NewDstMO.getReg(); + if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) + return false; + + // Make sure there are no other definitions of IntB that would reach the + // uses which the new definition can reach. + if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) + return false; + + // If some of the uses of IntA.reg is already coalesced away, return false. + // It's not possible to determine whether it's safe to perform the coalescing. + for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { + MachineInstr *UseMI = MO.getParent(); + unsigned OpNo = &MO - &UseMI->getOperand(0); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) + continue; + // If this use is tied to a def, we can't rewrite the register. + if (UseMI->isRegTiedToDefOperand(OpNo)) + return false; + } + + DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' + << *DefMI); + + // At this point we have decided that it is legal to do this + // transformation. Start by commuting the instruction. + MachineBasicBlock *MBB = DefMI->getParent(); + MachineInstr *NewMI = + TII->commuteInstruction(DefMI, false, UseOpIdx, NewDstIdx); + if (!NewMI) + return false; + if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && + TargetRegisterInfo::isVirtualRegister(IntB.reg) && + !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) + return false; + if (NewMI != DefMI) { + LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); + MachineBasicBlock::iterator Pos = DefMI; + MBB->insert(Pos, NewMI); + MBB->erase(DefMI); + } + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A<kill> + // ... + // = B + + // Update uses of IntA of the specific Val# with IntB. + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), + UE = MRI->use_end(); + UI != UE; /* ++UI is below because of possible MI removal */) { + MachineOperand &UseMO = *UI; + ++UI; + if (UseMO.isUndef()) + continue; + MachineInstr *UseMI = UseMO.getParent(); + if (UseMI->isDebugValue()) { + // FIXME These don't have an instruction index. Not clear we have enough + // info to decide whether to do this replacement or not. For now do it. + UseMO.setReg(NewReg); + continue; + } + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + assert(US != IntA.end() && "Use must be live"); + if (US->valno != AValNo) + continue; + // Kill flags are no longer accurate. They are recomputed after RA. + UseMO.setIsKill(false); + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *TRI); + else + UseMO.setReg(NewReg); + if (UseMI == CopyMI) + continue; + if (!UseMI->isCopy()) + continue; + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + + // This copy will become a noop. If it's defining a new val#, merge it into + // BValNo. + SlotIndex DefIdx = UseIdx.getRegSlot(); + VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); + if (!DVNI) + continue; + DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); + assert(DVNI->def == DefIdx); + BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); + for (LiveInterval::SubRange &S : IntB.subranges()) { + VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); + if (!SubDVNI) + continue; + VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); + assert(SubBValNo->def == CopyIdx); + S.MergeValueNumberInto(SubDVNI, SubBValNo); + } + + ErasedInstrs.insert(UseMI); + LIS->RemoveMachineInstrFromMaps(UseMI); + UseMI->eraseFromParent(); + } + + // Extend BValNo by merging in IntA live segments of AValNo. Val# definition + // is updated. + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + if (IntB.hasSubRanges()) { + if (!IntA.hasSubRanges()) { + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); + IntA.createSubRangeFrom(Allocator, Mask, IntA); + } + SlotIndex AIdx = CopyIdx.getRegSlot(true); + for (LiveInterval::SubRange &SA : IntA.subranges()) { + VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); + assert(ASubValNo != nullptr); + + LaneBitmask AMask = SA.LaneMask; + for (LiveInterval::SubRange &SB : IntB.subranges()) { + LaneBitmask BMask = SB.LaneMask; + LaneBitmask Common = BMask & AMask; + if (Common == 0) + continue; + + DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask) + << " into " << PrintLaneMask(Common) << '\n'); + LaneBitmask BRest = BMask & ~AMask; + LiveInterval::SubRange *CommonRange; + if (BRest != 0) { + SB.LaneMask = BRest; + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest) + << '\n'); + // Duplicate SubRange for newly merged common stuff. + CommonRange = IntB.createSubRangeFrom(Allocator, Common, SB); + } else { + // We van reuse the L SubRange. + SB.LaneMask = Common; + CommonRange = &SB; + } + LiveRange RangeCopy(SB, Allocator); + + VNInfo *BSubValNo = CommonRange->getVNInfoAt(CopyIdx); + assert(BSubValNo->def == CopyIdx); + BSubValNo->def = ASubValNo->def; + addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo); + AMask &= ~BMask; + } + if (AMask != 0) { + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n'); + LiveRange *NewRange = IntB.createSubRange(Allocator, AMask); + VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator); + addSegmentsWithValNo(*NewRange, BSubValNo, SA, ASubValNo); + } + } + } + + BValNo->def = AValNo->def; + addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); + DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); + + LIS->removeVRegDefAt(IntA, AValNo->def); + + DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); + ++numCommutes; + return true; +} + +/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just +/// defining a subregister. +static bool definesFullReg(const MachineInstr &MI, unsigned Reg) { + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && + "This code cannot handle physreg aliasing"); + for (const MachineOperand &Op : MI.operands()) { + if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) + continue; + // Return true if we define the full register or don't care about the value + // inside other subregisters. + if (Op.getSubReg() == 0 || Op.isUndef()) + return true; + } + return false; +} + +bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, + MachineInstr *CopyMI, + bool &IsDefCopy) { + IsDefCopy = false; + unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); + unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + LiveInterval &SrcInt = LIS->getInterval(SrcReg); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); + assert(ValNo && "CopyMI input register not live"); + if (ValNo->isPHIDef() || ValNo->isUnused()) + return false; + MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); + if (!DefMI) + return false; + if (DefMI->isCopyLike()) { + IsDefCopy = true; + return false; + } + if (!TII->isAsCheapAsAMove(DefMI)) + return false; + if (!TII->isTriviallyReMaterializable(DefMI, AA)) + return false; + if (!definesFullReg(*DefMI, SrcReg)) + return false; + bool SawStore = false; + if (!DefMI->isSafeToMove(AA, SawStore)) + return false; + const MCInstrDesc &MCID = DefMI->getDesc(); + if (MCID.getNumDefs() != 1) + return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + unsigned CopyDstReg = DstOperand.getReg(); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; + + // If both SrcIdx and DstIdx are set, correct rematerialization would widen + // the register substantially (beyond both source and dest size). This is bad + // for performance since it can cascade through a function, introducing many + // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers + // around after a few subreg copies). + if (SrcIdx && DstIdx) + return false; + + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); + if (!DefMI->isImplicitDef()) { + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + unsigned NewDstReg = DstReg; + + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); + if (NewDstIdx) + NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); + + // Finally, make sure that the physical subregister that will be + // constructed later is permitted for the instruction. + if (!DefRC->contains(NewDstReg)) + return false; + } else { + // Theoretically, some stack frame reference could exist. Just make sure + // it hasn't actually happened. + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "Only expect to deal with virtual or physical registers"); + } + } + + MachineBasicBlock *MBB = CopyMI->getParent(); + MachineBasicBlock::iterator MII = + std::next(MachineBasicBlock::iterator(CopyMI)); + TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); + MachineInstr *NewMI = std::prev(MII); + + // In a situation like the following: + // %vreg0:subreg = instr ; DefMI, subreg = DstIdx + // %vreg1 = copy %vreg0:subreg ; CopyMI, SrcIdx = 0 + // instead of widening %vreg1 to the register class of %vreg0 simply do: + // %vreg1 = instr + const TargetRegisterClass *NewRC = CP.getNewRC(); + if (DstIdx != 0) { + MachineOperand &DefMO = NewMI->getOperand(0); + if (DefMO.getSubReg() == DstIdx) { + assert(SrcIdx == 0 && CP.isFlipped() + && "Shouldn't have SrcIdx+DstIdx at this point"); + const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); + const TargetRegisterClass *CommonRC = + TRI->getCommonSubClass(DefRC, DstRC); + if (CommonRC != nullptr) { + NewRC = CommonRC; + DstIdx = 0; + DefMO.setSubReg(0); + } + } + } + + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ErasedInstrs.insert(CopyMI); + + // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). + // We need to remember these so we can add intervals once we insert + // NewMI into SlotIndexes. + SmallVector<unsigned, 4> NewMIImplDefs; + for (unsigned i = NewMI->getDesc().getNumOperands(), + e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (MO.isReg() && MO.isDef()) { + assert(MO.isImplicit() && MO.isDead() && + TargetRegisterInfo::isPhysicalRegister(MO.getReg())); + NewMIImplDefs.push_back(MO.getReg()); + } + } + + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + unsigned NewIdx = NewMI->getOperand(0).getSubReg(); + + if (DefRC != nullptr) { + if (NewIdx) + NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); + else + NewRC = TRI->getCommonSubClass(NewRC, DefRC); + assert(NewRC && "subreg chosen for remat incompatible with instruction"); + } + MRI->setRegClass(DstReg, NewRC); + + updateRegDefsUses(DstReg, DstReg, DstIdx); + NewMI->getOperand(0).setSubReg(NewIdx); + } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { + // The New instruction may be defining a sub-register of what's actually + // been asked for. If so it must implicitly define the whole thing. + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Only expect virtual or physical registers in remat"); + NewMI->getOperand(0).setIsDead(true); + NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/)); + // Record small dead def live-ranges for all the subregisters + // of the destination register. + // Otherwise, variables that live through may miss some + // interferences, thus creating invalid allocation. + // E.g., i386 code: + // vreg1 = somedef ; vreg1 GR8 + // vreg2 = remat ; vreg2 GR32 + // CL = COPY vreg2.sub_8bit + // = somedef vreg1 ; vreg1 GR8 + // => + // vreg1 = somedef ; vreg1 GR8 + // ECX<def, dead> = remat ; CL<imp-def> + // = somedef vreg1 ; vreg1 GR8 + // vreg1 will see the inteferences with CL but not with CH since + // no live-ranges would have been created for ECX. + // Fix that! + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (MCRegUnitIterator Units(NewMI->getOperand(0).getReg(), TRI); + Units.isValid(); ++Units) + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + } + + if (NewMI->getOperand(0).getSubReg()) + NewMI->getOperand(0).setIsUndef(); + + // CopyMI may have implicit operands, transfer them over to the newly + // rematerialized instruction. And update implicit def interval valnos. + for (unsigned i = CopyMI->getDesc().getNumOperands(), + e = CopyMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = CopyMI->getOperand(i); + if (MO.isReg()) { + assert(MO.isImplicit() && "No explicit operands after implict operands."); + // Discard VReg implicit defs. + if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { + NewMI->addOperand(MO); + } + } + } + + SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); + for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { + unsigned Reg = NewMIImplDefs[i]; + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + } + + DEBUG(dbgs() << "Remat: " << *NewMI); + ++NumReMats; + + // The source interval can become smaller because we removed a use. + shrinkToUses(&SrcInt, &DeadDefs); + if (!DeadDefs.empty()) { + // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs + // to describe DstReg instead. + for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { + MachineInstr *UseMI = UseMO.getParent(); + if (UseMI->isDebugValue()) { + UseMO.setReg(DstReg); + DEBUG(dbgs() << "\t\tupdated: " << *UseMI); + } + } + eliminateDeadDefs(); + } + + return true; +} + +bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { + // ProcessImpicitDefs may leave some copies of <undef> values, it only removes + // local variables. When we have a copy like: + // + // %vreg1 = COPY %vreg2<undef> + // + // We delete the copy and remove the corresponding value number from %vreg1. + // Any uses of that value number are marked as <undef>. + + // Note that we do not query CoalescerPair here but redo isMoveInstr as the + // CoalescerPair may have a new register class with adjusted subreg indices + // at this point. + unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; + isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); + + SlotIndex Idx = LIS->getInstructionIndex(CopyMI); + const LiveInterval &SrcLI = LIS->getInterval(SrcReg); + // CopyMI is undef iff SrcReg is not live before the instruction. + if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { + LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); + for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { + if ((SR.LaneMask & SrcMask) == 0) + continue; + if (SR.liveAt(Idx)) + return false; + } + } else if (SrcLI.liveAt(Idx)) + return false; + + DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); + + // Remove any DstReg segments starting at the instruction. + LiveInterval &DstLI = LIS->getInterval(DstReg); + SlotIndex RegIndex = Idx.getRegSlot(); + // Remove value or merge with previous one in case of a subregister def. + if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { + VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); + DstLI.MergeValueNumberInto(VNI, PrevVNI); + + // The affected subregister segments can be removed. + LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); + for (LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & DstMask) == 0) + continue; + + VNInfo *SVNI = SR.getVNInfoAt(RegIndex); + assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); + SR.removeValNo(SVNI); + } + DstLI.removeEmptySubRanges(); + } else + LIS->removeVRegDefAt(DstLI, RegIndex); + + // Mark uses as undef. + for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { + if (MO.isDef() /*|| MO.isUndef()*/) + continue; + const MachineInstr &MI = *MO.getParent(); + SlotIndex UseIdx = LIS->getInstructionIndex(&MI); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); + bool isLive; + if (UseMask != ~0u && DstLI.hasSubRanges()) { + isLive = false; + for (const LiveInterval::SubRange &SR : DstLI.subranges()) { + if ((SR.LaneMask & UseMask) == 0) + continue; + if (SR.liveAt(UseIdx)) { + isLive = true; + break; + } + } + } else + isLive = DstLI.liveAt(UseIdx); + if (isLive) + continue; + MO.setIsUndef(true); + DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); + } + return true; +} + +void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, + unsigned DstReg, + unsigned SubIdx) { + bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); + + SmallPtrSet<MachineInstr*, 8> Visited; + for (MachineRegisterInfo::reg_instr_iterator + I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); + I != E; ) { + MachineInstr *UseMI = &*(I++); + + // Each instruction can only be rewritten once because sub-register + // composition is not always idempotent. When SrcReg != DstReg, rewriting + // the UseMI operands removes them from the SrcReg use-def chain, but when + // SrcReg is DstReg we could encounter UseMI twice if it has multiple + // operands mentioning the virtual register. + if (SrcReg == DstReg && !Visited.insert(UseMI).second) + continue; + + SmallVector<unsigned,8> Ops; + bool Reads, Writes; + std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + + // If SrcReg wasn't read, it may still be the case that DstReg is live-in + // because SrcReg is a sub-register. + if (DstInt && !Reads && SubIdx) + Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + + // Adjust <undef> flags in case of sub-register joins. We don't want to + // turn a full def into a read-modify-write sub-register def and vice + // versa. + if (SubIdx && MO.isDef()) + MO.setIsUndef(!Reads); + + // A subreg use of a partially undef (super) register may be a complete + // undef use now and then has to be marked that way. + if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { + if (!DstInt->hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); + DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); + } + LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubIdx); + bool IsUndef = true; + SlotIndex MIIdx = UseMI->isDebugValue() + ? LIS->getSlotIndexes()->getIndexBefore(UseMI) + : LIS->getInstructionIndex(UseMI); + SlotIndex UseIdx = MIIdx.getRegSlot(true); + for (LiveInterval::SubRange &S : DstInt->subranges()) { + if ((S.LaneMask & Mask) == 0) + continue; + if (S.liveAt(UseIdx)) { + IsUndef = false; + break; + } + } + if (IsUndef) { + MO.setIsUndef(true); + // We found out some subregister use is actually reading an undefined + // value. In some cases the whole vreg has become undefined at this + // point so we have to potentially shrink the main range if the + // use was ending a live segment there. + LiveQueryResult Q = DstInt->Query(MIIdx); + if (Q.valueOut() == nullptr) + ShrinkMainRange = true; + } + } + + if (DstIsPhys) + MO.substPhysReg(DstReg, *TRI); + else + MO.substVirtReg(DstReg, SubIdx, *TRI); + } + + DEBUG({ + dbgs() << "\t\tupdated: "; + if (!UseMI->isDebugValue()) + dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; + dbgs() << *UseMI; + }); + } +} + +bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { + // Always join simple intervals that are defined by a single copy from a + // reserved register. This doesn't increase register pressure, so it is + // always beneficial. + if (!MRI->isReserved(CP.getDstReg())) { + DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); + return false; + } + + LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); + if (JoinVInt.containsOneValue()) + return true; + + DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); + return false; +} + +bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { + + Again = false; + DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); + + CoalescerPair CP(*TRI); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + + if (CP.getNewRC()) { + auto SrcRC = MRI->getRegClass(CP.getSrcReg()); + auto DstRC = MRI->getRegClass(CP.getDstReg()); + unsigned SrcIdx = CP.getSrcIdx(); + unsigned DstIdx = CP.getDstIdx(); + if (CP.isFlipped()) { + std::swap(SrcIdx, DstIdx); + std::swap(SrcRC, DstRC); + } + if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, + CP.getNewRC())) { + DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); + return false; + } + } + + // Dead code elimination. This really should be handled by MachineDCE, but + // sometimes dead copies slip through, and we can't generate invalid live + // ranges. + if (!CP.isPhys() && CopyMI->allDefsAreDead()) { + DEBUG(dbgs() << "\tCopy is dead.\n"); + DeadDefs.push_back(CopyMI); + eliminateDeadDefs(); + return true; + } + + // Eliminate undefs. + if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + return false; // Not coalescable. + } + + // Coalesced copies are normally removed immediately, but transformations + // like removeCopyByCommutingDef() can inadvertently create identity copies. + // When that happens, just join the values and remove the copy. + if (CP.getSrcReg() == CP.getDstReg()) { + LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); + const SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + LiveQueryResult LRQ = LI.Query(CopyIdx); + if (VNInfo *DefVNI = LRQ.valueDefined()) { + VNInfo *ReadVNI = LRQ.valueIn(); + assert(ReadVNI && "No value before copy and no <undef> flag."); + assert(ReadVNI != DefVNI && "Cannot read and define the same value."); + LI.MergeValueNumberInto(DefVNI, ReadVNI); + + // Process subregister liveranges. + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveQueryResult SLRQ = S.Query(CopyIdx); + if (VNInfo *SDefVNI = SLRQ.valueDefined()) { + VNInfo *SReadVNI = SLRQ.valueIn(); + S.MergeValueNumberInto(SDefVNI, SReadVNI); + } + } + DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); + } + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + return true; + } + + // Enforce policies. + if (CP.isPhys()) { + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) + << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) + << '\n'); + if (!canJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) + return true; + if (IsDefCopy) + Again = true; // May be possible to coalesce later. + return false; + } + } else { + // When possible, let DstReg be the larger interval. + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) + CP.flip(); + + DEBUG({ + dbgs() << "\tConsidering merging to " + << TRI->getRegClassName(CP.getNewRC()) << " with "; + if (CP.getDstIdx() && CP.getSrcIdx()) + dbgs() << PrintReg(CP.getDstReg()) << " in " + << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " + << PrintReg(CP.getSrcReg()) << " in " + << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; + else + dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " + << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; + }); + } + + ShrinkMask = 0; + ShrinkMainRange = false; + + // Okay, attempt to join these two intervals. On failure, this returns false. + // Otherwise, if one of the intervals being joined is a physreg, this method + // always canonicalizes DstInt to be it. The output "SrcInt" will not have + // been modified, so we can use this information below to update aliases. + if (!joinIntervals(CP)) { + // Coalescing failed. + + // If definition of source is defined by trivial computation, try + // rematerializing it. + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) + return true; + + // If we can eliminate the copy without merging the live segments, do so + // now. + if (!CP.isPartial() && !CP.isPhys()) { + if (adjustCopiesBackFrom(CP, CopyMI) || + removeCopyByCommutingDef(CP, CopyMI)) { + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } + } + + // Otherwise, we are unable to join the intervals. + DEBUG(dbgs() << "\tInterference!\n"); + Again = true; // May be possible to coalesce later. + return false; + } + + // Coalescing to a virtual register that is of a sub-register class of the + // other. Make sure the resulting register is set to the right register class. + if (CP.isCrossClass()) { + ++numCrossRCs; + MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); + } + + // Removing sub-register copies can ease the register class constraints. + // Make sure we attempt to inflate the register class of DstReg. + if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) + InflateRegs.push_back(CP.getDstReg()); + + // CopyMI has been erased by joinIntervals at this point. Remove it from + // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back + // to the work list. This keeps ErasedInstrs from growing needlessly. + ErasedInstrs.erase(CopyMI); + + // Rewrite all SrcReg operands to DstReg. + // Also update DstReg operands to include DstIdx if it is set. + if (CP.getDstIdx()) + updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); + updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); + + // Shrink subregister ranges if necessary. + if (ShrinkMask != 0) { + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + for (LiveInterval::SubRange &S : LI.subranges()) { + if ((S.LaneMask & ShrinkMask) == 0) + continue; + DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) + << ")\n"); + LIS->shrinkToUses(S, LI.reg); + } + LI.removeEmptySubRanges(); + } + if (ShrinkMainRange) { + LiveInterval &LI = LIS->getInterval(CP.getDstReg()); + shrinkToUses(&LI); + } + + // SrcReg is guaranteed to be the register whose live interval that is + // being merged. + LIS->removeInterval(CP.getSrcReg()); + + // Update regalloc hint. + TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); + + DEBUG({ + dbgs() << "\tSuccess: " << PrintReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) + << " -> " << PrintReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; + dbgs() << "\tResult = "; + if (CP.isPhys()) + dbgs() << PrintReg(CP.getDstReg(), TRI); + else + dbgs() << LIS->getInterval(CP.getDstReg()); + dbgs() << '\n'; + }); + + ++numJoins; + return true; +} + +bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { + unsigned DstReg = CP.getDstReg(); + assert(CP.isPhys() && "Must be a physreg copy"); + assert(MRI->isReserved(DstReg) && "Not a reserved register"); + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); + + assert(RHS.containsOneValue() && "Invalid join with reserved register"); + + // Optimization for reserved registers like ESP. We can only merge with a + // reserved physreg if RHS has a single value that is a copy of DstReg. + // The live range of the reserved register will look like a set of dead defs + // - we don't properly track the live range of reserved registers. + + // Deny any overlapping intervals. This depends on all the reserved + // register live ranges to look like dead defs. + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) + if (RHS.overlaps(LIS->getRegUnit(*UI))) { + DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); + return false; + } + + // Skip any value computations, we are not adding new values to the + // reserved register. Also skip merging the live ranges, the reserved + // register live range doesn't need to be accurate as long as all the + // defs are there. + + // Delete the identity copy. + MachineInstr *CopyMI; + if (CP.isFlipped()) { + CopyMI = MRI->getVRegDef(RHS.reg); + } else { + if (!MRI->hasOneNonDBGUse(RHS.reg)) { + DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); + return false; + } + + MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); + CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); + const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + + // We checked above that there are no interfering defs of the physical + // register. However, for this case, where we intent to move up the def of + // the physical register, we also need to check for interfering uses. + SlotIndexes *Indexes = LIS->getSlotIndexes(); + for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); + SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { + MachineInstr *MI = LIS->getInstructionFromIndex(SI); + if (MI->readsRegister(DstReg, TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << *MI); + return false; + } + + // We must also check for clobbers caused by regmasks. + for (const auto &MO : MI->operands()) { + if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) { + DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI); + return false; + } + } + } + + // We're going to remove the copy which defines a physical reserved + // register, so remove its valno, etc. + DEBUG(dbgs() << "\t\tRemoving phys reg def of " << DstReg << " at " + << CopyRegIdx << "\n"); + + LIS->removePhysRegDefAt(DstReg, CopyRegIdx); + // Create a new dead def at the new def location. + for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { + LiveRange &LR = LIS->getRegUnit(*UI); + LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); + } + } + + LIS->RemoveMachineInstrFromMaps(CopyMI); + CopyMI->eraseFromParent(); + + // We don't track kills for reserved registers. + MRI->clearKillFlags(CP.getSrcReg()); + + return true; +} + +//===----------------------------------------------------------------------===// +// Interference checking and interval joining +//===----------------------------------------------------------------------===// +// +// In the easiest case, the two live ranges being joined are disjoint, and +// there is no interference to consider. It is quite common, though, to have +// overlapping live ranges, and we need to check if the interference can be +// resolved. +// +// The live range of a single SSA value forms a sub-tree of the dominator tree. +// This means that two SSA values overlap if and only if the def of one value +// is contained in the live range of the other value. As a special case, the +// overlapping values can be defined at the same index. +// +// The interference from an overlapping def can be resolved in these cases: +// +// 1. Coalescable copies. The value is defined by a copy that would become an +// identity copy after joining SrcReg and DstReg. The copy instruction will +// be removed, and the value will be merged with the source value. +// +// There can be several copies back and forth, causing many values to be +// merged into one. We compute a list of ultimate values in the joined live +// range as well as a mappings from the old value numbers. +// +// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI +// predecessors have a live out value. It doesn't cause real interference, +// and can be merged into the value it overlaps. Like a coalescable copy, it +// can be erased after joining. +// +// 3. Copy of external value. The overlapping def may be a copy of a value that +// is already in the other register. This is like a coalescable copy, but +// the live range of the source register must be trimmed after erasing the +// copy instruction: +// +// %src = COPY %ext +// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. +// +// 4. Clobbering undefined lanes. Vector registers are sometimes built by +// defining one lane at a time: +// +// %dst:ssub0<def,read-undef> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// The live range of %src overlaps the %dst value defined by FOO, but +// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane +// which was undef anyway. +// +// The value mapping is more complicated in this case. The final live range +// will have different value numbers for both FOO and BAR, but there is no +// simple mapping from old to new values. It may even be necessary to add +// new PHI values. +// +// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that +// is live, but never read. This can happen because we don't compute +// individual live ranges per lane. +// +// %dst<def> = FOO +// %src = BAR +// %dst:ssub1<def> = COPY %src +// +// This kind of interference is only resolved locally. If the clobbered +// lane value escapes the block, the join is aborted. + +namespace { +/// Track information about values in a single virtual register about to be +/// joined. Objects of this class are always created in pairs - one for each +/// side of the CoalescerPair (or one for each lane of a side of the coalescer +/// pair) +class JoinVals { + /// Live range we work on. + LiveRange &LR; + /// (Main) register we work on. + const unsigned Reg; + + /// Reg (and therefore the values in this liverange) will end up as + /// subregister SubIdx in the coalesced register. Either CP.DstIdx or + /// CP.SrcIdx. + const unsigned SubIdx; + /// The LaneMask that this liverange will occupy the coalesced register. May + /// be smaller than the lanemask produced by SubIdx when merging subranges. + const LaneBitmask LaneMask; + + /// This is true when joining sub register ranges, false when joining main + /// ranges. + const bool SubRangeJoin; + /// Whether the current LiveInterval tracks subregister liveness. + const bool TrackSubRegLiveness; + + /// Values that will be present in the final live range. + SmallVectorImpl<VNInfo*> &NewVNInfo; + + const CoalescerPair &CP; + LiveIntervals *LIS; + SlotIndexes *Indexes; + const TargetRegisterInfo *TRI; + + /// Value number assignments. Maps value numbers in LI to entries in + /// NewVNInfo. This is suitable for passing to LiveInterval::join(). + SmallVector<int, 8> Assignments; + + /// Conflict resolution for overlapping values. + enum ConflictResolution { + /// No overlap, simply keep this value. + CR_Keep, + + /// Merge this value into OtherVNI and erase the defining instruction. + /// Used for IMPLICIT_DEF, coalescable copies, and copies from external + /// values. + CR_Erase, + + /// Merge this value into OtherVNI but keep the defining instruction. + /// This is for the special case where OtherVNI is defined by the same + /// instruction. + CR_Merge, + + /// Keep this value, and have it replace OtherVNI where possible. This + /// complicates value mapping since OtherVNI maps to two different values + /// before and after this def. + /// Used when clobbering undefined or dead lanes. + CR_Replace, + + /// Unresolved conflict. Visit later when all values have been mapped. + CR_Unresolved, + + /// Unresolvable conflict. Abort the join. + CR_Impossible + }; + + /// Per-value info for LI. The lane bit masks are all relative to the final + /// joined register, so they can be compared directly between SrcReg and + /// DstReg. + struct Val { + ConflictResolution Resolution; + + /// Lanes written by this def, 0 for unanalyzed values. + LaneBitmask WriteLanes; + + /// Lanes with defined values in this register. Other lanes are undef and + /// safe to clobber. + LaneBitmask ValidLanes; + + /// Value in LI being redefined by this def. + VNInfo *RedefVNI; + + /// Value in the other live range that overlaps this def, if any. + VNInfo *OtherVNI; + + /// Is this value an IMPLICIT_DEF that can be erased? + /// + /// IMPLICIT_DEF values should only exist at the end of a basic block that + /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be + /// safely erased if they are overlapping a live value in the other live + /// interval. + /// + /// Weird control flow graphs and incomplete PHI handling in + /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with + /// longer live ranges. Such IMPLICIT_DEF values should be treated like + /// normal values. + bool ErasableImplicitDef; + + /// True when the live range of this value will be pruned because of an + /// overlapping CR_Replace value in the other live range. + bool Pruned; + + /// True once Pruned above has been computed. + bool PrunedComputed; + + Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), + RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false), + Pruned(false), PrunedComputed(false) {} + + bool isAnalyzed() const { return WriteLanes != 0; } + }; + + /// One entry per value number in LI. + SmallVector<Val, 8> Vals; + + /// Compute the bitmask of lanes actually written by DefMI. + /// Set Redef if there are any partial register definitions that depend on the + /// previous value of the register. + LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; + + /// Find the ultimate value that VNI was copied from. + std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; + + bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; + + /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. + /// Return a conflict resolution when possible, but leave the hard cases as + /// CR_Unresolved. + /// Recursively calls computeAssignment() on this and Other, guaranteeing that + /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. + /// The recursion always goes upwards in the dominator tree, making loops + /// impossible. + ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); + + /// Compute the value assignment for ValNo in RI. + /// This may be called recursively by analyzeValue(), but never for a ValNo on + /// the stack. + void computeAssignment(unsigned ValNo, JoinVals &Other); + + /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute + /// the extent of the tainted lanes in the block. + /// + /// Multiple values in Other.LR can be affected since partial redefinitions + /// can preserve previously tainted lanes. + /// + /// 1 %dst = VLOAD <-- Define all lanes in %dst + /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 + /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 + /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read + /// + /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) + /// entry to TaintedVals. + /// + /// Returns false if the tainted lanes extend beyond the basic block. + bool taintExtent(unsigned, LaneBitmask, JoinVals&, + SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> >&); + + /// Return true if MI uses any of the given Lanes from Reg. + /// This does not include partial redefinitions of Reg. + bool usesLanes(const MachineInstr *MI, unsigned, unsigned, LaneBitmask) const; + + /// Determine if ValNo is a copy of a value number in LR or Other.LR that will + /// be pruned: + /// + /// %dst = COPY %src + /// %src = COPY %dst <-- This value to be pruned. + /// %dst = COPY %src <-- This value is a copy of a pruned value. + bool isPrunedValue(unsigned ValNo, JoinVals &Other); + +public: + JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, + SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, + LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, + bool TrackSubRegLiveness) + : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), + SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), + NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), + TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) + {} + + /// Analyze defs in LR and compute a value mapping in NewVNInfo. + /// Returns false if any conflicts were impossible to resolve. + bool mapValues(JoinVals &Other); + + /// Try to resolve conflicts that require all values to be mapped. + /// Returns false if any conflicts were impossible to resolve. + bool resolveConflicts(JoinVals &Other); + + /// Prune the live range of values in Other.LR where they would conflict with + /// CR_Replace values in LR. Collect end points for restoring the live range + /// after joining. + void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, + bool changeInstrs); + + /// Removes subranges starting at copies that get removed. This sometimes + /// happens when undefined subranges are copied around. These ranges contain + /// no useful information and can be removed. + void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); + + /// Erase any machine instructions that have been coalesced away. + /// Add erased instructions to ErasedInstrs. + /// Add foreign virtual registers to ShrinkRegs if their live range ended at + /// the erased instrs. + void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs); + + /// Remove liverange defs at places where implicit defs will be removed. + void removeImplicitDefs(); + + /// Get the value assignments suitable for passing to LiveInterval::join. + const int *getAssignments() const { return Assignments.data(); } +}; +} // end anonymous namespace + +LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) + const { + LaneBitmask L = 0; + for (const MachineOperand &MO : DefMI->operands()) { + if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) + continue; + L |= TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); + if (MO.readsReg()) + Redef = true; + } + return L; +} + +std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( + const VNInfo *VNI) const { + unsigned Reg = this->Reg; + + while (!VNI->isPHIDef()) { + SlotIndex Def = VNI->def; + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No defining instruction"); + if (!MI->isFullCopy()) + return std::make_pair(VNI, Reg); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + return std::make_pair(VNI, Reg); + + const LiveInterval &LI = LIS->getInterval(SrcReg); + const VNInfo *ValueIn; + // No subrange involved. + if (!SubRangeJoin || !LI.hasSubRanges()) { + LiveQueryResult LRQ = LI.Query(Def); + ValueIn = LRQ.valueIn(); + } else { + // Query subranges. Pick the first matching one. + ValueIn = nullptr; + for (const LiveInterval::SubRange &S : LI.subranges()) { + // Transform lanemask to a mask in the joined live interval. + LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); + if ((SMask & LaneMask) == 0) + continue; + LiveQueryResult LRQ = S.Query(Def); + ValueIn = LRQ.valueIn(); + break; + } + } + if (ValueIn == nullptr) + break; + VNI = ValueIn; + Reg = SrcReg; + } + return std::make_pair(VNI, Reg); +} + +bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, + const JoinVals &Other) const { + const VNInfo *Orig0; + unsigned Reg0; + std::tie(Orig0, Reg0) = followCopyChain(Value0); + if (Orig0 == Value1) + return true; + + const VNInfo *Orig1; + unsigned Reg1; + std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); + + // The values are equal if they are defined at the same place and use the + // same register. Note that we cannot compare VNInfos directly as some of + // them might be from a copy created in mergeSubRangeInto() while the other + // is from the original LiveInterval. + return Orig0->def == Orig1->def && Reg0 == Reg1; +} + +JoinVals::ConflictResolution +JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + assert(!V.isAnalyzed() && "Value has already been analyzed!"); + VNInfo *VNI = LR.getValNumInfo(ValNo); + if (VNI->isUnused()) { + V.WriteLanes = ~0u; + return CR_Keep; + } + + // Get the instruction defining this value, compute the lanes written. + const MachineInstr *DefMI = nullptr; + if (VNI->isPHIDef()) { + // Conservatively assume that all lanes in a PHI are valid. + LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx); + V.ValidLanes = V.WriteLanes = Lanes; + } else { + DefMI = Indexes->getInstructionFromIndex(VNI->def); + assert(DefMI != nullptr); + if (SubRangeJoin) { + // We don't care about the lanes when joining subregister ranges. + V.WriteLanes = V.ValidLanes = 1; + if (DefMI->isImplicitDef()) { + V.ValidLanes = 0; + V.ErasableImplicitDef = true; + } + } else { + bool Redef = false; + V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); + + // If this is a read-modify-write instruction, there may be more valid + // lanes than the ones written by this instruction. + // This only covers partial redef operands. DefMI may have normal use + // operands reading the register. They don't contribute valid lanes. + // + // This adds ssub1 to the set of valid lanes in %src: + // + // %src:ssub1<def> = FOO + // + // This leaves only ssub1 valid, making any other lanes undef: + // + // %src:ssub1<def,read-undef> = FOO %src:ssub2 + // + // The <read-undef> flag on the def operand means that old lane values are + // not important. + if (Redef) { + V.RedefVNI = LR.Query(VNI->def).valueIn(); + assert((TrackSubRegLiveness || V.RedefVNI) && + "Instruction is reading nonexistent value"); + if (V.RedefVNI != nullptr) { + computeAssignment(V.RedefVNI->id, Other); + V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; + } + } + + // An IMPLICIT_DEF writes undef values. + if (DefMI->isImplicitDef()) { + // We normally expect IMPLICIT_DEF values to be live only until the end + // of their block. If the value is really live longer and gets pruned in + // another block, this flag is cleared again. + V.ErasableImplicitDef = true; + V.ValidLanes &= ~V.WriteLanes; + } + } + } + + // Find the value in Other that overlaps VNI->def, if any. + LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def); + + // It is possible that both values are defined by the same instruction, or + // the values are PHIs defined in the same block. When that happens, the two + // values should be merged into one, but not into any preceding value. + // The first value defined or visited gets CR_Keep, the other gets CR_Merge. + if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { + assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); + + // One value stays, the other is merged. Keep the earlier one, or the first + // one we see. + if (OtherVNI->def < VNI->def) + Other.computeAssignment(OtherVNI->id, *this); + else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { + // This is an early-clobber def overlapping a live-in value in the other + // register. Not mergeable. + V.OtherVNI = OtherLRQ.valueIn(); + return CR_Impossible; + } + V.OtherVNI = OtherVNI; + Val &OtherV = Other.Vals[OtherVNI->id]; + // Keep this value, check for conflicts when analyzing OtherVNI. + if (!OtherV.isAnalyzed()) + return CR_Keep; + // Both sides have been analyzed now. + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Merge; + if (V.ValidLanes & OtherV.ValidLanes) + // Overlapping lanes can't be resolved. + return CR_Impossible; + else + return CR_Merge; + } + + // No simultaneous def. Is Other live at the def? + V.OtherVNI = OtherLRQ.valueIn(); + if (!V.OtherVNI) + // No overlap, no conflict. + return CR_Keep; + + assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); + + // We have overlapping values, or possibly a kill of Other. + // Recursively compute assignments up the dominator tree. + Other.computeAssignment(V.OtherVNI->id, *this); + Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. + // This shouldn't normally happen, but ProcessImplicitDefs can leave such + // IMPLICIT_DEF instructions behind, and there is nothing wrong with it + // technically. + // + // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try + // to erase the IMPLICIT_DEF instruction. + if (OtherV.ErasableImplicitDef && DefMI && + DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { + DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def + << " extends into BB#" << DefMI->getParent()->getNumber() + << ", keeping it.\n"); + OtherV.ErasableImplicitDef = false; + } + + // Allow overlapping PHI values. Any real interference would show up in a + // predecessor, the PHI itself can't introduce any conflicts. + if (VNI->isPHIDef()) + return CR_Replace; + + // Check for simple erasable conflicts. + if (DefMI->isImplicitDef()) { + // We need the def for the subregister if there is nothing else live at the + // subrange at this point. + if (TrackSubRegLiveness + && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0) + return CR_Replace; + return CR_Erase; + } + + // Include the non-conflict where DefMI is a coalescable copy that kills + // OtherVNI. We still want the copy erased and value numbers merged. + if (CP.isCoalescable(DefMI)) { + // Some of the lanes copied from OtherVNI may be undef, making them undef + // here too. + V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; + return CR_Erase; + } + + // This may not be a real conflict if DefMI simply kills Other and defines + // VNI. + if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) + return CR_Keep; + + // Handle the case where VNI and OtherVNI can be proven to be identical: + // + // %other = COPY %ext + // %this = COPY %ext <-- Erase this copy + // + if (DefMI->isFullCopy() && !CP.isPartial() + && valuesIdentical(VNI, V.OtherVNI, Other)) + return CR_Erase; + + // If the lanes written by this instruction were all undef in OtherVNI, it is + // still safe to join the live ranges. This can't be done with a simple value + // mapping, though - OtherVNI will map to multiple values: + // + // 1 %dst:ssub0 = FOO <-- OtherVNI + // 2 %src = BAR <-- VNI + // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. + // 4 BAZ %dst<kill> + // 5 QUUX %src<kill> + // + // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace + // handles this complex value mapping. + if ((V.WriteLanes & OtherV.ValidLanes) == 0) + return CR_Replace; + + // If the other live range is killed by DefMI and the live ranges are still + // overlapping, it must be because we're looking at an early clobber def: + // + // %dst<def,early-clobber> = ASM %src<kill> + // + // In this case, it is illegal to merge the two live ranges since the early + // clobber def would clobber %src before it was read. + if (OtherLRQ.isKill()) { + // This case where the def doesn't overlap the kill is handled above. + assert(VNI->def.isEarlyClobber() && + "Only early clobber defs can overlap a kill"); + return CR_Impossible; + } + + // VNI is clobbering live lanes in OtherVNI, but there is still the + // possibility that no instructions actually read the clobbered lanes. + // If we're clobbering all the lanes in OtherVNI, at least one must be read. + // Otherwise Other.RI wouldn't be live here. + if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) + return CR_Impossible; + + // We need to verify that no instructions are reading the clobbered lanes. To + // save compile time, we'll only check that locally. Don't allow the tainted + // value to escape the basic block. + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) + return CR_Impossible; + + // There are still some things that could go wrong besides clobbered lanes + // being read, for example OtherVNI may be only partially redefined in MBB, + // and some clobbered lanes could escape the block. Save this analysis for + // resolveConflicts() when all values have been mapped. We need to know + // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute + // that now - the recursive analyzeValue() calls must go upwards in the + // dominator tree. + return CR_Unresolved; +} + +void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.isAnalyzed()) { + // Recursion should always move up the dominator tree, so ValNo is not + // supposed to reappear before it has been assigned. + assert(Assignments[ValNo] != -1 && "Bad recursion?"); + return; + } + switch ((V.Resolution = analyzeValue(ValNo, Other))) { + case CR_Erase: + case CR_Merge: + // Merge this ValNo into OtherVNI. + assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); + assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); + Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; + DEBUG(dbgs() << "\t\tmerge " << PrintReg(Reg) << ':' << ValNo << '@' + << LR.getValNumInfo(ValNo)->def << " into " + << PrintReg(Other.Reg) << ':' << V.OtherVNI->id << '@' + << V.OtherVNI->def << " --> @" + << NewVNInfo[Assignments[ValNo]]->def << '\n'); + break; + case CR_Replace: + case CR_Unresolved: { + // The other value is going to be pruned if this join is successful. + assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); + Val &OtherV = Other.Vals[V.OtherVNI->id]; + // We cannot erase an IMPLICIT_DEF if we don't have valid values for all + // its lanes. + if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness) + OtherV.ErasableImplicitDef = false; + OtherV.Pruned = true; + } + // Fall through. + default: + // This value number needs to go in the final joined live range. + Assignments[ValNo] = NewVNInfo.size(); + NewVNInfo.push_back(LR.getValNumInfo(ValNo)); + break; + } +} + +bool JoinVals::mapValues(JoinVals &Other) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + computeAssignment(i, Other); + if (Vals[i].Resolution == CR_Impossible) { + DEBUG(dbgs() << "\t\tinterference at " << PrintReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); + return false; + } + } + return true; +} + +bool JoinVals:: +taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, + SmallVectorImpl<std::pair<SlotIndex, LaneBitmask> > &TaintExtent) { + VNInfo *VNI = LR.getValNumInfo(ValNo); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); + + // Scan Other.LR from VNI.def to MBBEnd. + LiveInterval::iterator OtherI = Other.LR.find(VNI->def); + assert(OtherI != Other.LR.end() && "No conflict?"); + do { + // OtherI is pointing to a tainted value. Abort the join if the tainted + // lanes escape the block. + SlotIndex End = OtherI->end; + if (End >= MBBEnd) { + DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.Reg) << ':' + << OtherI->valno->id << '@' << OtherI->start << '\n'); + return false; + } + DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.Reg) << ':' + << OtherI->valno->id << '@' << OtherI->start + << " to " << End << '\n'); + // A dead def is not a problem. + if (End.isDead()) + break; + TaintExtent.push_back(std::make_pair(End, TaintedLanes)); + + // Check for another def in the MBB. + if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd) + break; + + // Lanes written by the new def are no longer tainted. + const Val &OV = Other.Vals[OtherI->valno->id]; + TaintedLanes &= ~OV.WriteLanes; + if (!OV.RedefVNI) + break; + } while (TaintedLanes); + return true; +} + +bool JoinVals::usesLanes(const MachineInstr *MI, unsigned Reg, unsigned SubIdx, + LaneBitmask Lanes) const { + if (MI->isDebugValue()) + return false; + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) + continue; + if (!MO.readsReg()) + continue; + if (Lanes & TRI->getSubRegIndexLaneMask( + TRI->composeSubRegIndices(SubIdx, MO.getSubReg()))) + return true; + } + return false; +} + +bool JoinVals::resolveConflicts(JoinVals &Other) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); + if (V.Resolution != CR_Unresolved) + continue; + DEBUG(dbgs() << "\t\tconflict at " << PrintReg(Reg) << ':' << i + << '@' << LR.getValNumInfo(i)->def << '\n'); + if (SubRangeJoin) + return false; + + ++NumLaneConflicts; + assert(V.OtherVNI && "Inconsistent conflict resolution."); + VNInfo *VNI = LR.getValNumInfo(i); + const Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the + // join, those lanes will be tainted with a wrong value. Get the extent of + // the tainted lanes. + LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; + SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent; + if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) + // Tainted lanes would extend beyond the basic block. + return false; + + assert(!TaintExtent.empty() && "There should be at least one conflict."); + + // Now look at the instructions from VNI->def to TaintExtent (inclusive). + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); + MachineBasicBlock::iterator MI = MBB->begin(); + if (!VNI->isPHIDef()) { + MI = Indexes->getInstructionFromIndex(VNI->def); + // No need to check the instruction defining VNI for reads. + ++MI; + } + assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && + "Interference ends on VNI->def. Should have been handled earlier"); + MachineInstr *LastMI = + Indexes->getInstructionFromIndex(TaintExtent.front().first); + assert(LastMI && "Range must end at a proper instruction"); + unsigned TaintNum = 0; + for(;;) { + assert(MI != MBB->end() && "Bad LastMI"); + if (usesLanes(MI, Other.Reg, Other.SubIdx, TaintedLanes)) { + DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); + return false; + } + // LastMI is the last instruction to use the current value. + if (&*MI == LastMI) { + if (++TaintNum == TaintExtent.size()) + break; + LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); + assert(LastMI && "Range must end at a proper instruction"); + TaintedLanes = TaintExtent[TaintNum].second; + } + ++MI; + } + + // The tainted lanes are unused. + V.Resolution = CR_Replace; + ++NumLaneResolves; + } + return true; +} + +bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { + Val &V = Vals[ValNo]; + if (V.Pruned || V.PrunedComputed) + return V.Pruned; + + if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) + return V.Pruned; + + // Follow copies up the dominator tree and check if any intermediate value + // has been pruned. + V.PrunedComputed = true; + V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); + return V.Pruned; +} + +void JoinVals::pruneValues(JoinVals &Other, + SmallVectorImpl<SlotIndex> &EndPoints, + bool changeInstrs) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + SlotIndex Def = LR.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: + break; + case CR_Replace: { + // This value takes precedence over the value in Other.LR. + LIS->pruneValue(Other.LR, Def, &EndPoints); + // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF + // instructions are only inserted to provide a live-out value for PHI + // predecessors, so the instruction should simply go away once its value + // has been replaced. + Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; + bool EraseImpDef = OtherV.ErasableImplicitDef && + OtherV.Resolution == CR_Keep; + if (!Def.isBlock()) { + if (changeInstrs) { + // Remove <def,read-undef> flags. This def is now a partial redef. + // Also remove <def,dead> flags since the joined live range will + // continue past this instruction. + for (MachineOperand &MO : + Indexes->getInstructionFromIndex(Def)->operands()) { + if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { + MO.setIsUndef(EraseImpDef); + MO.setIsDead(false); + } + } + } + // This value will reach instructions below, but we need to make sure + // the live range also reaches the instruction at Def. + if (!EraseImpDef) + EndPoints.push_back(Def); + } + DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.Reg) << " at " << Def + << ": " << Other.LR << '\n'); + break; + } + case CR_Erase: + case CR_Merge: + if (isPrunedValue(i, Other)) { + // This value is ultimately a copy of a pruned value in LR or Other.LR. + // We can no longer trust the value mapping computed by + // computeAssignment(), the value that was originally copied could have + // been replaced. + LIS->pruneValue(LR, Def, &EndPoints); + DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(Reg) << " at " + << Def << ": " << LR << '\n'); + } + break; + case CR_Unresolved: + case CR_Impossible: + llvm_unreachable("Unresolved conflicts"); + } + } +} + +void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) +{ + // Look for values being erased. + bool DidPrune = false; + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + if (Vals[i].Resolution != CR_Erase) + continue; + + // Check subranges at the point where the copy will be removed. + SlotIndex Def = LR.getValNumInfo(i)->def; + for (LiveInterval::SubRange &S : LI.subranges()) { + LiveQueryResult Q = S.Query(Def); + + // If a subrange starts at the copy then an undefined value has been + // copied and we must remove that subrange value as well. + VNInfo *ValueOut = Q.valueOutOrDead(); + if (ValueOut != nullptr && Q.valueIn() == nullptr) { + DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) + << " at " << Def << "\n"); + LIS->pruneValue(S, Def, nullptr); + DidPrune = true; + // Mark value number as unused. + ValueOut->markUnused(); + continue; + } + // If a subrange ends at the copy, then a value was copied but only + // partially used later. Shrink the subregister range appropriately. + if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { + DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) + << " at " << Def << "\n"); + ShrinkMask |= S.LaneMask; + } + } + } + if (DidPrune) + LI.removeEmptySubRanges(); +} + +void JoinVals::removeImplicitDefs() { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + Val &V = Vals[i]; + if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) + continue; + + VNInfo *VNI = LR.getValNumInfo(i); + VNI->markUnused(); + LR.removeValNo(VNI); + } +} + +void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, + SmallVectorImpl<unsigned> &ShrinkRegs) { + for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { + // Get the def location before markUnused() below invalidates it. + SlotIndex Def = LR.getValNumInfo(i)->def; + switch (Vals[i].Resolution) { + case CR_Keep: { + // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any + // longer. The IMPLICIT_DEF instructions are only inserted by + // PHIElimination to guarantee that all PHI predecessors have a value. + if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) + break; + // Remove value number i from LR. + VNInfo *VNI = LR.getValNumInfo(i); + LR.removeValNo(VNI); + // Note that this VNInfo is reused and still referenced in NewVNInfo, + // make it appear like an unused value number. + VNI->markUnused(); + DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'); + // FALL THROUGH. + } + + case CR_Erase: { + MachineInstr *MI = Indexes->getInstructionFromIndex(Def); + assert(MI && "No instruction to erase"); + if (MI->isCopy()) { + unsigned Reg = MI->getOperand(1).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg) && + Reg != CP.getSrcReg() && Reg != CP.getDstReg()) + ShrinkRegs.push_back(Reg); + } + ErasedInstrs.insert(MI); + DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + break; + } + default: + break; + } + } +} + +void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, + LaneBitmask LaneMask, + const CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, + NewVNInfo, CP, LIS, TRI, true, true); + JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, + NewVNInfo, CP, LIS, TRI, true, true); + + // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) + // We should be able to resolve all conflicts here as we could successfully do + // it on the mainrange already. There is however a problem when multiple + // ranges get mapped to the "overflow" lane mask bit which creates unexpected + // interferences. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); + } + if (!LHSVals.resolveConflicts(RHSVals) || + !RHSVals.resolveConflicts(LHSVals)) { + // We already determined that it is legal to merge the intervals, so this + // should never fail. + llvm_unreachable("*** Couldn't join subrange!\n"); + } + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints, false); + RHSVals.pruneValues(LHSVals, EndPoints, false); + + LHSVals.removeImplicitDefs(); + RHSVals.removeImplicitDefs(); + + LRange.verify(); + RRange.verify(); + + // Join RRange into LHS. + LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), + NewVNInfo); + + DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); + if (EndPoints.empty()) + return; + + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LRange << '\n'); + LIS->extendToIndices(LRange, EndPoints); +} + +void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, + const LiveRange &ToMerge, + LaneBitmask LaneMask, + CoalescerPair &CP) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + for (LiveInterval::SubRange &R : LI.subranges()) { + LaneBitmask RMask = R.LaneMask; + // LaneMask of subregisters common to subrange R and ToMerge. + LaneBitmask Common = RMask & LaneMask; + // There is nothing to do without common subregs. + if (Common == 0) + continue; + + DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into " + << PrintLaneMask(Common) << '\n'); + // LaneMask of subregisters contained in the R range but not in ToMerge, + // they have to split into their own subrange. + LaneBitmask LRest = RMask & ~LaneMask; + LiveInterval::SubRange *CommonRange; + if (LRest != 0) { + R.LaneMask = LRest; + DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n'); + // Duplicate SubRange for newly merged common stuff. + CommonRange = LI.createSubRangeFrom(Allocator, Common, R); + } else { + // Reuse the existing range. + R.LaneMask = Common; + CommonRange = &R; + } + LiveRange RangeCopy(ToMerge, Allocator); + joinSubRegRanges(*CommonRange, RangeCopy, Common, CP); + LaneMask &= ~RMask; + } + + if (LaneMask != 0) { + DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n'); + LI.createSubRangeFrom(Allocator, LaneMask, ToMerge); + } +} + +bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { + SmallVector<VNInfo*, 16> NewVNInfo; + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); + bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); + JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS, + TRI, false, TrackSubRegLiveness); + JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS, + TRI, false, TrackSubRegLiveness); + + DEBUG(dbgs() << "\t\tRHS = " << RHS + << "\n\t\tLHS = " << LHS + << '\n'); + + // First compute NewVNInfo and the simple value mappings. + // Detect impossible conflicts early. + if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) + return false; + + // Some conflicts can only be resolved after all values have been mapped. + if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) + return false; + + // All clear, the live ranges can be merged. + if (RHS.hasSubRanges() || LHS.hasSubRanges()) { + BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); + + // Transform lanemasks from the LHS to masks in the coalesced register and + // create initial subranges if necessary. + unsigned DstIdx = CP.getDstIdx(); + if (!LHS.hasSubRanges()) { + LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(DstIdx); + // LHS must support subregs or we wouldn't be in this codepath. + assert(Mask != 0); + LHS.createSubRangeFrom(Allocator, Mask, LHS); + } else if (DstIdx != 0) { + // Transform LHS lanemasks to new register class if necessary. + for (LiveInterval::SubRange &R : LHS.subranges()) { + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); + R.LaneMask = Mask; + } + } + DEBUG(dbgs() << "\t\tLHST = " << PrintReg(CP.getDstReg()) + << ' ' << LHS << '\n'); + + // Determine lanemasks of RHS in the coalesced register and merge subranges. + unsigned SrcIdx = CP.getSrcIdx(); + if (!RHS.hasSubRanges()) { + LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() + : TRI->getSubRegIndexLaneMask(SrcIdx); + mergeSubRangeInto(LHS, RHS, Mask, CP); + } else { + // Pair up subranges and merge. + for (LiveInterval::SubRange &R : RHS.subranges()) { + LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); + mergeSubRangeInto(LHS, R, Mask, CP); + } + } + DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); + + LHSVals.pruneSubRegValues(LHS, ShrinkMask); + RHSVals.pruneSubRegValues(LHS, ShrinkMask); + } + + // The merging algorithm in LiveInterval::join() can't handle conflicting + // value mappings, so we need to remove any live ranges that overlap a + // CR_Replace resolution. Collect a set of end points that can be used to + // restore the live range after joining. + SmallVector<SlotIndex, 8> EndPoints; + LHSVals.pruneValues(RHSVals, EndPoints, true); + RHSVals.pruneValues(LHSVals, EndPoints, true); + + // Erase COPY and IMPLICIT_DEF instructions. This may cause some external + // registers to require trimming. + SmallVector<unsigned, 8> ShrinkRegs; + LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); + while (!ShrinkRegs.empty()) + shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); + + // Join RHS into LHS. + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); + + // Kill flags are going to be wrong if the live ranges were overlapping. + // Eventually, we should simply clear all kill flags when computing live + // ranges. They are reinserted after register allocation. + MRI->clearKillFlags(LHS.reg); + MRI->clearKillFlags(RHS.reg); + + if (!EndPoints.empty()) { + // Recompute the parts of the live range we had to remove because of + // CR_Replace conflicts. + DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() + << " points: " << LHS << '\n'); + LIS->extendToIndices((LiveRange&)LHS, EndPoints); + } + + return true; +} + +bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { + return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); +} + +namespace { +/// Information concerning MBB coalescing priority. +struct MBBPriorityInfo { + MachineBasicBlock *MBB; + unsigned Depth; + bool IsSplit; + + MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) + : MBB(mbb), Depth(depth), IsSplit(issplit) {} +}; +} + +/// C-style comparator that sorts first based on the loop depth of the basic +/// block (the unsigned), and then on the MBB number. +/// +/// EnableGlobalCopies assumes that the primary sort key is loop depth. +static int compareMBBPriority(const MBBPriorityInfo *LHS, + const MBBPriorityInfo *RHS) { + // Deeper loops first + if (LHS->Depth != RHS->Depth) + return LHS->Depth > RHS->Depth ? -1 : 1; + + // Try to unsplit critical edges next. + if (LHS->IsSplit != RHS->IsSplit) + return LHS->IsSplit ? -1 : 1; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); + unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); + if (cl != cr) + return cl > cr ? -1 : 1; + + // As a last resort, sort by block number. + return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1; +} + +/// \returns true if the given copy uses or defines a local live range. +static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { + if (!Copy->isCopy()) + return false; + + if (Copy->getOperand(1).isUndef()) + return false; + + unsigned SrcReg = Copy->getOperand(1).getReg(); + unsigned DstReg = Copy->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) + || TargetRegisterInfo::isPhysicalRegister(DstReg)) + return false; + + return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) + || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); +} + +bool RegisterCoalescer:: +copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { + bool Progress = false; + for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { + if (!CurrList[i]) + continue; + // Skip instruction pointers that have already been erased, for example by + // dead code elimination. + if (ErasedInstrs.erase(CurrList[i])) { + CurrList[i] = nullptr; + continue; + } + bool Again = false; + bool Success = joinCopy(CurrList[i], Again); + Progress |= Success; + if (Success || !Again) + CurrList[i] = nullptr; + } + return Progress; +} + +/// Check if DstReg is a terminal node. +/// I.e., it does not have any affinity other than \p Copy. +static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, + const MachineRegisterInfo *MRI) { + assert(Copy.isCopyLike()); + // Check if the destination of this copy as any other affinity. + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) + if (&MI != &Copy && MI.isCopyLike()) + return false; + return true; +} + +bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { + assert(Copy.isCopyLike()); + if (!UseTerminalRule) + return false; + unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; + isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); + // Check if the destination of this copy has any other affinity. + if (TargetRegisterInfo::isPhysicalRegister(DstReg) || + // If SrcReg is a physical register, the copy won't be coalesced. + // Ignoring it may have other side effect (like missing + // rematerialization). So keep it. + TargetRegisterInfo::isPhysicalRegister(SrcReg) || + !isTerminalReg(DstReg, Copy, MRI)) + return false; + + // DstReg is a terminal node. Check if it interferes with any other + // copy involving SrcReg. + const MachineBasicBlock *OrigBB = Copy.getParent(); + const LiveInterval &DstLI = LIS->getInterval(DstReg); + for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { + // Technically we should check if the weight of the new copy is + // interesting compared to the other one and update the weight + // of the copies accordingly. However, this would only work if + // we would gather all the copies first then coalesce, whereas + // right now we interleave both actions. + // For now, just consider the copies that are in the same block. + if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) + continue; + unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; + isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, + OtherSubReg); + if (OtherReg == SrcReg) + OtherReg = OtherSrcReg; + // Check if OtherReg is a non-terminal. + if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || + isTerminalReg(OtherReg, MI, MRI)) + continue; + // Check that OtherReg interfere with DstReg. + if (LIS->getInterval(OtherReg).overlaps(DstLI)) { + DEBUG(dbgs() << "Apply terminal rule for: " << PrintReg(DstReg) << '\n'); + return true; + } + } + return false; +} + +void +RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { + DEBUG(dbgs() << MBB->getName() << ":\n"); + + // Collect all copy-like instructions in MBB. Don't start coalescing anything + // yet, it might invalidate the iterator. + const unsigned PrevSize = WorkList.size(); + if (JoinGlobalCopies) { + SmallVector<MachineInstr*, 2> LocalTerminals; + SmallVector<MachineInstr*, 2> GlobalTerminals; + // Coalesce copies bottom-up to coalesce local defs before local uses. They + // are not inherently easier to resolve, but slightly preferable until we + // have local live range splitting. In particular this is required by + // cmp+jmp macro fusion. + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { + if (!MII->isCopyLike()) + continue; + bool ApplyTerminalRule = applyTerminalRule(*MII); + if (isLocalCopy(&(*MII), LIS)) { + if (ApplyTerminalRule) + LocalTerminals.push_back(&(*MII)); + else + LocalWorkList.push_back(&(*MII)); + } else { + if (ApplyTerminalRule) + GlobalTerminals.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } + } + // Append the copies evicted by the terminal rule at the end of the list. + LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); + WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); + } + else { + SmallVector<MachineInstr*, 2> Terminals; + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) + if (MII->isCopyLike()) { + if (applyTerminalRule(*MII)) + Terminals.push_back(&(*MII)); + else + WorkList.push_back(MII); + } + // Append the copies evicted by the terminal rule at the end of the list. + WorkList.append(Terminals.begin(), Terminals.end()); + } + // Try coalescing the collected copies immediately, and remove the nulls. + // This prevents the WorkList from getting too large since most copies are + // joinable on the first attempt. + MutableArrayRef<MachineInstr*> + CurrList(WorkList.begin() + PrevSize, WorkList.end()); + if (copyCoalesceWorkList(CurrList)) + WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), + (MachineInstr*)nullptr), WorkList.end()); +} + +void RegisterCoalescer::coalesceLocals() { + copyCoalesceWorkList(LocalWorkList); + for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { + if (LocalWorkList[j]) + WorkList.push_back(LocalWorkList[j]); + } + LocalWorkList.clear(); +} + +void RegisterCoalescer::joinAllIntervals() { + DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); + assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); + + std::vector<MBBPriorityInfo> MBBs; + MBBs.reserve(MF->size()); + for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ + MachineBasicBlock *MBB = &*I; + MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), + JoinSplitEdges && isSplitEdge(MBB))); + } + array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); + + // Coalesce intervals in MBB priority order. + unsigned CurrDepth = UINT_MAX; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + // Try coalescing the collected local copies for deeper loops. + if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { + coalesceLocals(); + CurrDepth = MBBs[i].Depth; + } + copyCoalesceInMBB(MBBs[i].MBB); + } + coalesceLocals(); + + // Joining intervals can allow other intervals to be joined. Iteratively join + // until we make no progress. + while (copyCoalesceWorkList(WorkList)) + /* empty */ ; +} + +void RegisterCoalescer::releaseMemory() { + ErasedInstrs.clear(); + WorkList.clear(); + DeadDefs.clear(); + InflateRegs.clear(); +} + +bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + MRI = &fn.getRegInfo(); + TM = &fn.getTarget(); + const TargetSubtargetInfo &STI = fn.getSubtarget(); + TRI = STI.getRegisterInfo(); + TII = STI.getInstrInfo(); + LIS = &getAnalysis<LiveIntervals>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + Loops = &getAnalysis<MachineLoopInfo>(); + if (EnableGlobalCopies == cl::BOU_UNSET) + JoinGlobalCopies = STI.enableJoinGlobalCopies(); + else + JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + + // The MachineScheduler does not currently require JoinSplitEdges. This will + // either be enabled unconditionally or replaced by a more general live range + // splitting optimization. + JoinSplitEdges = EnableJoinSplits; + + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" + << "********** Function: " << MF->getName() << '\n'); + + if (VerifyCoalescing) + MF->verify(this, "Before register coalescing"); + + RegClassInfo.runOnMachineFunction(fn); + + // Join (coalesce) intervals if requested. + if (EnableJoining) + joinAllIntervals(); + + // After deleting a lot of copies, register classes may be less constrained. + // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> + // DPR inflation. + array_pod_sort(InflateRegs.begin(), InflateRegs.end()); + InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), + InflateRegs.end()); + DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); + for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { + unsigned Reg = InflateRegs[i]; + if (MRI->reg_nodbg_empty(Reg)) + continue; + if (MRI->recomputeRegClass(Reg)) { + DEBUG(dbgs() << PrintReg(Reg) << " inflated to " + << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); + ++NumInflated; + + LiveInterval &LI = LIS->getInterval(Reg); + if (LI.hasSubRanges()) { + // If the inflated register class does not support subregisters anymore + // remove the subranges. + if (!MRI->shouldTrackSubRegLiveness(Reg)) { + LI.clearSubRanges(); + } else { +#ifndef NDEBUG + LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); + // If subranges are still supported, then the same subregs + // should still be supported. + for (LiveInterval::SubRange &S : LI.subranges()) { + assert((S.LaneMask & ~MaxMask) == 0); + } +#endif + } + } + } + } + + DEBUG(dump()); + if (VerifyCoalescing) + MF->verify(this, "After register coalescing"); + return true; +} + +void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { + LIS->print(O, m); +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.h b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h new file mode 100644 index 0000000..04067a1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.h @@ -0,0 +1,116 @@ +//===-- RegisterCoalescer.h - Register Coalescing Interface -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the abstract interface for register coalescers, +// allowing them to interact with and query register allocators. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_REGISTERCOALESCER_H +#define LLVM_LIB_CODEGEN_REGISTERCOALESCER_H + +namespace llvm { + + class MachineInstr; + class TargetRegisterInfo; + class TargetRegisterClass; + class TargetInstrInfo; + + /// A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetRegisterInfo &TRI; + + /// The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned DstReg; + + /// The virtual register that will be coalesced into dstReg. + unsigned SrcReg; + + /// The sub-register index of the old DstReg in the new coalesced register. + unsigned DstIdx; + + /// The sub-register index of the old SrcReg in the new coalesced register. + unsigned SrcIdx; + + /// True when the original copy was a partial subregister copy. + bool Partial; + + /// True when both regs are virtual and newRC is constrained. + bool CrossClass; + + /// True when DstReg and SrcReg are reversed from the original + /// copy instruction. + bool Flipped; + + /// The register class of the coalesced register, or NULL if DstReg + /// is a physreg. This register class may be a super-register of both + /// SrcReg and DstReg. + const TargetRegisterClass *NewRC; + + public: + CoalescerPair(const TargetRegisterInfo &tri) + : TRI(tri), DstReg(0), SrcReg(0), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} + + /// Create a CoalescerPair representing a virtreg-to-physreg copy. + /// No need to call setRegisters(). + CoalescerPair(unsigned VirtReg, unsigned PhysReg, + const TargetRegisterInfo &tri) + : TRI(tri), DstReg(PhysReg), SrcReg(VirtReg), DstIdx(0), SrcIdx(0), + Partial(false), CrossClass(false), Flipped(false), NewRC(nullptr) {} + + /// Set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// Swap SrcReg and DstReg. Return false if swapping is impossible + /// because DstReg is a physical register, or SubIdx is set. + bool flip(); + + /// Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// Return true if DstReg is a physical register. + bool isPhys() const { return !NewRC; } + + /// Return true if the original copy instruction did not copy + /// the full register, but was a subreg operation. + bool isPartial() const { return Partial; } + + /// Return true if DstReg is virtual and NewRC is a smaller + /// register class than DstReg's. + bool isCrossClass() const { return CrossClass; } + + /// Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return Flipped; } + + /// Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return DstReg; } + + /// Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return SrcReg; } + + /// Return the subregister index that DstReg will be coalesced into, or 0. + unsigned getDstIdx() const { return DstIdx; } + + /// Return the subregister index that SrcReg will be coalesced into, or 0. + unsigned getSrcIdx() const { return SrcIdx; } + + /// Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return NewRC; } + }; +} // End llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp new file mode 100644 index 0000000..3749b1d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -0,0 +1,1014 @@ +//===-- RegisterPressure.cpp - Dynamic Register Pressure ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the RegisterPressure class which can be used to track +// MachineInstr level register pressure. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// Increase pressure for each pressure set provided by TargetRegisterInfo. +static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure, + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) + CurrSetPressure[*PSetI] += Weight; +} + +/// Decrease pressure for each pressure set provided by TargetRegisterInfo. +static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSetI] -= Weight; + } +} + +LLVM_DUMP_METHOD +void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, + const TargetRegisterInfo *TRI) { + bool Empty = true; + for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { + if (SetPressure[i] != 0) { + dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + Empty = false; + } + } + if (Empty) + dbgs() << "\n"; +} + +LLVM_DUMP_METHOD +void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { + dbgs() << "Max Pressure: "; + dumpRegSetPressure(MaxSetPressure, TRI); + dbgs() << "Live In: "; + for (unsigned Reg : LiveInRegs) + dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; + dbgs() << '\n'; + dbgs() << "Live Out: "; + for (unsigned Reg : LiveOutRegs) + dbgs() << PrintVRegOrUnit(Reg, TRI) << " "; + dbgs() << '\n'; +} + +LLVM_DUMP_METHOD +void RegPressureTracker::dump() const { + if (!isTopClosed() || !isBottomClosed()) { + dbgs() << "Curr Pressure: "; + dumpRegSetPressure(CurrSetPressure, TRI); + } + P.dump(TRI); +} + +void PressureDiff::dump(const TargetRegisterInfo &TRI) const { + const char *sep = ""; + for (const PressureChange &Change : *this) { + if (!Change.isValid()) + break; + dbgs() << sep << TRI.getRegPressureSetName(Change.getPSet()) + << " " << Change.getUnitInc(); + sep = " "; + } + dbgs() << '\n'; +} + +/// Increase the current pressure as impacted by these registers and bump +/// the high water mark if needed. +void RegPressureTracker::increaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned RegUnit : RegUnits) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + P.MaxSetPressure[*PSetI] = + std::max(P.MaxSetPressure[*PSetI], CurrSetPressure[*PSetI]); + } + } +} + +/// Simply decrease the current pressure as impacted by these registers. +void RegPressureTracker::decreaseRegPressure(ArrayRef<unsigned> RegUnits) { + for (unsigned RegUnit : RegUnits) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnit)); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void IntervalPressure::reset() { + TopIdx = BottomIdx = SlotIndex(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// Clear the result so it can be used for another round of pressure tracking. +void RegionPressure::reset() { + TopPos = BottomPos = MachineBasicBlock::const_iterator(); + MaxSetPressure.clear(); + LiveInRegs.clear(); + LiveOutRegs.clear(); +} + +/// If the current top is not less than or equal to the next index, open it. +/// We happen to need the SlotIndex for the next top for pressure update. +void IntervalPressure::openTop(SlotIndex NextTop) { + if (TopIdx <= NextTop) + return; + TopIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current top is the previous instruction (before receding), open it. +void RegionPressure::openTop(MachineBasicBlock::const_iterator PrevTop) { + if (TopPos != PrevTop) + return; + TopPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +/// If the current bottom is not greater than the previous index, open it. +void IntervalPressure::openBottom(SlotIndex PrevBottom) { + if (BottomIdx > PrevBottom) + return; + BottomIdx = SlotIndex(); + LiveInRegs.clear(); +} + +/// If the current bottom is the previous instr (before advancing), open it. +void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { + if (BottomPos != PrevBottom) + return; + BottomPos = MachineBasicBlock::const_iterator(); + LiveInRegs.clear(); +} + +void LiveRegSet::init(const MachineRegisterInfo &MRI) { + const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); + unsigned NumRegUnits = TRI.getNumRegs(); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + Regs.setUniverse(NumRegUnits + NumVirtRegs); + this->NumRegUnits = NumRegUnits; +} + +void LiveRegSet::clear() { + Regs.clear(); +} + +static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return &LIS.getInterval(Reg); + return LIS.getCachedRegUnit(Reg); +} + +void RegPressureTracker::reset() { + MBB = nullptr; + LIS = nullptr; + + CurrSetPressure.clear(); + LiveThruPressure.clear(); + P.MaxSetPressure.clear(); + + if (RequireIntervals) + static_cast<IntervalPressure&>(P).reset(); + else + static_cast<RegionPressure&>(P).reset(); + + LiveRegs.clear(); + UntiedDefs.clear(); +} + +/// Setup the RegPressureTracker. +/// +/// TODO: Add support for pressure without LiveIntervals. +void RegPressureTracker::init(const MachineFunction *mf, + const RegisterClassInfo *rci, + const LiveIntervals *lis, + const MachineBasicBlock *mbb, + MachineBasicBlock::const_iterator pos, + bool ShouldTrackUntiedDefs) +{ + reset(); + + MF = mf; + TRI = MF->getSubtarget().getRegisterInfo(); + RCI = rci; + MRI = &MF->getRegInfo(); + MBB = mbb; + TrackUntiedDefs = ShouldTrackUntiedDefs; + + if (RequireIntervals) { + assert(lis && "IntervalPressure requires LiveIntervals"); + LIS = lis; + } + + CurrPos = pos; + CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); + + P.MaxSetPressure = CurrSetPressure; + + LiveRegs.init(*MRI); + if (TrackUntiedDefs) + UntiedDefs.setUniverse(MRI->getNumVirtRegs()); +} + +/// Does this pressure result have a valid top position and live ins. +bool RegPressureTracker::isTopClosed() const { + if (RequireIntervals) + return static_cast<IntervalPressure&>(P).TopIdx.isValid(); + return (static_cast<RegionPressure&>(P).TopPos == + MachineBasicBlock::const_iterator()); +} + +/// Does this pressure result have a valid bottom position and live outs. +bool RegPressureTracker::isBottomClosed() const { + if (RequireIntervals) + return static_cast<IntervalPressure&>(P).BottomIdx.isValid(); + return (static_cast<RegionPressure&>(P).BottomPos == + MachineBasicBlock::const_iterator()); +} + + +SlotIndex RegPressureTracker::getCurrSlot() const { + MachineBasicBlock::const_iterator IdxPos = CurrPos; + while (IdxPos != MBB->end() && IdxPos->isDebugValue()) + ++IdxPos; + if (IdxPos == MBB->end()) + return LIS->getMBBEndIdx(MBB); + return LIS->getInstructionIndex(IdxPos).getRegSlot(); +} + +/// Set the boundary for the top of the region and summarize live ins. +void RegPressureTracker::closeTop() { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).TopIdx = getCurrSlot(); + else + static_cast<RegionPressure&>(P).TopPos = CurrPos; + + assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); + P.LiveInRegs.reserve(LiveRegs.size()); + LiveRegs.appendTo(P.LiveInRegs); +} + +/// Set the boundary for the bottom of the region and summarize live outs. +void RegPressureTracker::closeBottom() { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).BottomIdx = getCurrSlot(); + else + static_cast<RegionPressure&>(P).BottomPos = CurrPos; + + assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); + P.LiveOutRegs.reserve(LiveRegs.size()); + LiveRegs.appendTo(P.LiveOutRegs); +} + +/// Finalize the region boundaries and record live ins and live outs. +void RegPressureTracker::closeRegion() { + if (!isTopClosed() && !isBottomClosed()) { + assert(LiveRegs.size() == 0 && "no region boundary"); + return; + } + if (!isBottomClosed()) + closeBottom(); + else if (!isTopClosed()) + closeTop(); + // If both top and bottom are closed, do nothing. +} + +/// The register tracker is unaware of global liveness so ignores normal +/// live-thru ranges. However, two-address or coalesced chains can also lead +/// to live ranges with no holes. Count these to inform heuristics that we +/// can never drop below this pressure. +void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { + LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); + assert(isBottomClosed() && "need bottom-up tracking to intialize."); + for (unsigned Reg : P.LiveOutRegs) { + if (TargetRegisterInfo::isVirtualRegister(Reg) + && !RPTracker.hasUntiedDef(Reg)) { + increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); + } + } +} + +/// \brief Convenient wrapper for checking membership in RegisterOperands. +/// (std::count() doesn't have an early exit). +static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) { + return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); +} + +namespace { + +/// List of register defined and used by a machine instruction. +class RegisterOperands { +public: + SmallVector<unsigned, 8> Uses; + SmallVector<unsigned, 8> Defs; + SmallVector<unsigned, 8> DeadDefs; + + void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, bool IgnoreDead = false); + + /// Use liveness information to find dead defs not marked with a dead flag + /// and move them to the DeadDefs vector. + void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); +}; + +/// Collect this instruction's unique uses and defs into SmallVectors for +/// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers +class RegisterOperandsCollector { + RegisterOperands &RegOpers; + const TargetRegisterInfo &TRI; + const MachineRegisterInfo &MRI; + bool IgnoreDead; + + RegisterOperandsCollector(RegisterOperands &RegOpers, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + bool IgnoreDead) + : RegOpers(RegOpers), TRI(TRI), MRI(MRI), IgnoreDead(IgnoreDead) {} + + void collectInstr(const MachineInstr &MI) const { + for (ConstMIBundleOperands OperI(&MI); OperI.isValid(); ++OperI) + collectOperand(*OperI); + + // Remove redundant physreg dead defs. + SmallVectorImpl<unsigned>::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); + } + + /// Push this operand's register onto the correct vectors. + void collectOperand(const MachineOperand &MO) const { + if (!MO.isReg() || !MO.getReg()) + return; + unsigned Reg = MO.getReg(); + if (MO.readsReg()) + pushRegUnits(Reg, RegOpers.Uses); + if (MO.isDef()) { + if (MO.isDead()) { + if (!IgnoreDead) + pushRegUnits(Reg, RegOpers.DeadDefs); + } else + pushRegUnits(Reg, RegOpers.Defs); + } + } + + void pushRegUnits(unsigned Reg, SmallVectorImpl<unsigned> &RegUnits) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (containsReg(RegUnits, Reg)) + return; + RegUnits.push_back(Reg); + } else if (MRI.isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { + if (containsReg(RegUnits, *Units)) + continue; + RegUnits.push_back(*Units); + } + } + } + + friend class RegisterOperands; +}; + +void RegisterOperands::collect(const MachineInstr &MI, + const TargetRegisterInfo &TRI, + const MachineRegisterInfo &MRI, + bool IgnoreDead) { + RegisterOperandsCollector Collector(*this, TRI, MRI, IgnoreDead); + Collector.collectInstr(MI); +} + +void RegisterOperands::detectDeadDefs(const MachineInstr &MI, + const LiveIntervals &LIS) { + SlotIndex SlotIdx = LIS.getInstructionIndex(&MI); + for (SmallVectorImpl<unsigned>::iterator RI = Defs.begin(); + RI != Defs.end(); /*empty*/) { + unsigned Reg = *RI; + const LiveRange *LR = getLiveRange(LIS, Reg); + if (LR != nullptr) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isDeadDef()) { + // LiveIntervals knows this is a dead even though it's MachineOperand is + // not flagged as such. + DeadDefs.push_back(Reg); + RI = Defs.erase(RI); + continue; + } + } + ++RI; + } +} + +} // namespace + +/// Initialize an array of N PressureDiffs. +void PressureDiffs::init(unsigned N) { + Size = N; + if (N <= Max) { + memset(PDiffArray, 0, N * sizeof(PressureDiff)); + return; + } + Max = Size; + free(PDiffArray); + PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff))); +} + +/// Add a change in pressure to the pressure diff of a given instruction. +void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, + const MachineRegisterInfo *MRI) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + // Find an existing entry in the pressure diff for this PSet. + PressureDiff::iterator I = nonconst_begin(), E = nonconst_end(); + for (; I != E && I->isValid(); ++I) { + if (I->getPSet() >= *PSetI) + break; + } + // If all pressure sets are more constrained, skip the remaining PSets. + if (I == E) + break; + // Insert this PressureChange. + if (!I->isValid() || I->getPSet() != *PSetI) { + PressureChange PTmp = PressureChange(*PSetI); + for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) + std::swap(*J, PTmp); + } + // Update the units for this pressure set. + unsigned NewUnitInc = I->getUnitInc() + Weight; + if (NewUnitInc != 0) { + I->setUnitInc(NewUnitInc); + } else { + // Remove entry + PressureDiff::iterator J; + for (J = std::next(I); J != E && J->isValid(); ++J, ++I) + *I = *J; + if (J != E) + *I = *J; + } + } +} + +/// Record the pressure difference induced by the given operand list. +static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, + const MachineRegisterInfo *MRI) { + assert(!PDiff.begin()->isValid() && "stale PDiff"); + + for (unsigned Reg : RegOpers.Defs) + PDiff.addPressureChange(Reg, true, MRI); + + for (unsigned Reg : RegOpers.Uses) + PDiff.addPressureChange(Reg, false, MRI); +} + +/// Force liveness of registers. +void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) { + for (unsigned Reg : Regs) { + if (LiveRegs.insert(Reg)) + increaseRegPressure(Reg); + } +} + +/// Add Reg to the live in set and increase max pressure. +void RegPressureTracker::discoverLiveIn(unsigned Reg) { + assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); + if (containsReg(P.LiveInRegs, Reg)) + return; + + // At live in discovery, unconditionally increase the high water mark. + P.LiveInRegs.push_back(Reg); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); +} + +/// Add Reg to the live out set and increase max pressure. +void RegPressureTracker::discoverLiveOut(unsigned Reg) { + assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); + if (containsReg(P.LiveOutRegs, Reg)) + return; + + // At live out discovery, unconditionally increase the high water mark. + P.LiveOutRegs.push_back(Reg); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); +} + +/// Recede across the previous instruction. If LiveUses is provided, record any +/// RegUnits that are made live by the current instruction's uses. This includes +/// registers that are both defined and used by the instruction. If a pressure +/// difference pointer is provided record the changes is pressure caused by this +/// instruction independent of liveness. +void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses, + PressureDiff *PDiff) { + assert(CurrPos != MBB->begin()); + if (!isBottomClosed()) + closeBottom(); + + // Open the top of the region using block iterators. + if (!RequireIntervals && isTopClosed()) + static_cast<RegionPressure&>(P).openTop(CurrPos); + + // Find the previous instruction. + do + --CurrPos; + while (CurrPos != MBB->begin() && CurrPos->isDebugValue()); + assert(!CurrPos->isDebugValue()); + + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + + // Open the top of the region using slot indexes. + if (RequireIntervals && isTopClosed()) + static_cast<IntervalPressure&>(P).openTop(SlotIdx); + + const MachineInstr &MI = *CurrPos; + RegisterOperands RegOpers; + RegOpers.collect(MI, *TRI, *MRI); + if (RequireIntervals) + RegOpers.detectDeadDefs(MI, *LIS); + + if (PDiff) + collectPDiff(*PDiff, RegOpers, MRI); + + // Boost pressure for all dead defs together. + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); + + // Kill liveness at live defs. + // TODO: consider earlyclobbers? + for (unsigned Reg : RegOpers.Defs) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); + } + + // Generate liveness for uses. + for (unsigned Reg : RegOpers.Uses) { + if (!LiveRegs.contains(Reg)) { + // Adjust liveouts if LiveIntervals are available. + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(*LIS, Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (!LRQ.isKill() && !LRQ.valueDefined()) + discoverLiveOut(Reg); + } + } + increaseRegPressure(Reg); + LiveRegs.insert(Reg); + if (LiveUses && !containsReg(*LiveUses, Reg)) + LiveUses->push_back(Reg); + } + } + if (TrackUntiedDefs) { + for (unsigned Reg : RegOpers.Defs) { + if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) + UntiedDefs.insert(Reg); + } + } +} + +/// Advance across the current instruction. +void RegPressureTracker::advance() { + assert(!TrackUntiedDefs && "unsupported mode"); + + assert(CurrPos != MBB->end()); + if (!isTopClosed()) + closeTop(); + + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = getCurrSlot(); + + // Open the bottom of the region using slot indexes. + if (isBottomClosed()) { + if (RequireIntervals) + static_cast<IntervalPressure&>(P).openBottom(SlotIdx); + else + static_cast<RegionPressure&>(P).openBottom(CurrPos); + } + + RegisterOperands RegOpers; + RegOpers.collect(*CurrPos, *TRI, *MRI); + + for (unsigned Reg : RegOpers.Uses) { + // Discover live-ins. + bool isLive = LiveRegs.contains(Reg); + if (!isLive) + discoverLiveIn(Reg); + // Kill liveness at last uses. + bool lastUse = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(*LIS, Reg); + lastUse = LR && LR->Query(SlotIdx).isKill(); + } else { + // Allocatable physregs are always single-use before register rewriting. + lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); + } + if (lastUse && isLive) { + LiveRegs.erase(Reg); + decreaseRegPressure(Reg); + } else if (!lastUse && !isLive) + increaseRegPressure(Reg); + } + + // Generate liveness for defs. + for (unsigned Reg : RegOpers.Defs) { + if (LiveRegs.insert(Reg)) + increaseRegPressure(Reg); + } + + // Boost pressure for all dead defs together. + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); + + // Find the next instruction. + do + ++CurrPos; + while (CurrPos != MBB->end() && CurrPos->isDebugValue()); +} + +/// Find the max change in excess pressure across all sets. +static void computeExcessPressureDelta(ArrayRef<unsigned> OldPressureVec, + ArrayRef<unsigned> NewPressureVec, + RegPressureDelta &Delta, + const RegisterClassInfo *RCI, + ArrayRef<unsigned> LiveThruPressureVec) { + Delta.Excess = PressureChange(); + for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { + unsigned POld = OldPressureVec[i]; + unsigned PNew = NewPressureVec[i]; + int PDiff = (int)PNew - (int)POld; + if (!PDiff) // No change in this set in the common case. + continue; + // Only consider change beyond the limit. + unsigned Limit = RCI->getRegPressureSetLimit(i); + if (!LiveThruPressureVec.empty()) + Limit += LiveThruPressureVec[i]; + + if (Limit > POld) { + if (Limit > PNew) + PDiff = 0; // Under the limit + else + PDiff = PNew - Limit; // Just exceeded limit. + } else if (Limit > PNew) + PDiff = Limit - POld; // Just obeyed limit. + + if (PDiff) { + Delta.Excess = PressureChange(i); + Delta.Excess.setUnitInc(PDiff); + break; + } + } +} + +/// Find the max change in max pressure that either surpasses a critical PSet +/// limit or exceeds the current MaxPressureLimit. +/// +/// FIXME: comparing each element of the old and new MaxPressure vectors here is +/// silly. It's done now to demonstrate the concept but will go away with a +/// RegPressureTracker API change to work with pressure differences. +static void computeMaxPressureDelta(ArrayRef<unsigned> OldMaxPressureVec, + ArrayRef<unsigned> NewMaxPressureVec, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit, + RegPressureDelta &Delta) { + Delta.CriticalMax = PressureChange(); + Delta.CurrentMax = PressureChange(); + + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { + unsigned POld = OldMaxPressureVec[i]; + unsigned PNew = NewMaxPressureVec[i]; + if (PNew == POld) // No change in this set in the common case. + continue; + + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (PDiff > 0) { + Delta.CriticalMax = PressureChange(i); + Delta.CriticalMax.setUnitInc(PDiff); + } + } + } + // Find the first increase above MaxPressureLimit. + // (Ignores negative MDiff). + if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) { + Delta.CurrentMax = PressureChange(i); + Delta.CurrentMax.setUnitInc(PNew - POld); + if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) + break; + } + } +} + +/// Record the upward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { + assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + + // Account for register pressure similar to RegPressureTracker::recede(). + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true); + assert(RegOpers.DeadDefs.size() == 0); + if (RequireIntervals) + RegOpers.detectDeadDefs(*MI, *LIS); + + // Kill liveness at live defs. + for (unsigned Reg : RegOpers.Defs) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); + } + // Generate liveness for uses. + for (unsigned Reg : RegOpers.Uses) { + if (!LiveRegs.contains(Reg)) + increaseRegPressure(Reg); + } +} + +/// Consider the pressure increase caused by traversing this instruction +/// bottom-up. Find the pressure set with the most change beyond its pressure +/// limit based on the tracker's current pressure, and return the change in +/// number of register units of that pressure set introduced by this +/// instruction. +/// +/// This assumes that the current LiveOut set is sufficient. +/// +/// This is expensive for an on-the-fly query because it calls +/// bumpUpwardPressure to recompute the pressure sets based on current +/// liveness. This mainly exists to verify correctness, e.g. with +/// -verify-misched. getUpwardPressureDelta is the fast version of this query +/// that uses the per-SUnit cache of the PressureDiff. +void RegPressureTracker:: +getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) { + // Snapshot Pressure. + // FIXME: The snapshot heap space should persist. But I'm planning to + // summarize the pressure effect so we don't need to snapshot at all. + std::vector<unsigned> SavedPressure = CurrSetPressure; + std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); + +#ifndef NDEBUG + if (!PDiff) + return; + + // Check if the alternate algorithm yields the same result. + RegPressureDelta Delta2; + getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit); + if (Delta != Delta2) { + dbgs() << "PDiff: "; + PDiff->dump(*TRI); + dbgs() << "DELTA: " << *MI; + if (Delta.Excess.isValid()) + dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet()) + << " " << Delta.Excess.getUnitInc() << "\n"; + if (Delta.CriticalMax.isValid()) + dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet()) + << " " << Delta.CriticalMax.getUnitInc() << "\n"; + if (Delta.CurrentMax.isValid()) + dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet()) + << " " << Delta.CurrentMax.getUnitInc() << "\n"; + if (Delta2.Excess.isValid()) + dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet()) + << " " << Delta2.Excess.getUnitInc() << "\n"; + if (Delta2.CriticalMax.isValid()) + dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet()) + << " " << Delta2.CriticalMax.getUnitInc() << "\n"; + if (Delta2.CurrentMax.isValid()) + dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet()) + << " " << Delta2.CurrentMax.getUnitInc() << "\n"; + llvm_unreachable("RegP Delta Mismatch"); + } +#endif +} + +/// This is the fast version of querying register pressure that does not +/// directly depend on current liveness. +/// +/// @param Delta captures information needed for heuristics. +/// +/// @param CriticalPSets Are the pressure sets that are known to exceed some +/// limit within the region, not necessarily at the current position. +/// +/// @param MaxPressureLimit Is the max pressure within the region, not +/// necessarily at the current position. +void RegPressureTracker:: +getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, + RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) const { + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (PressureDiff::const_iterator + PDiffI = PDiff.begin(), PDiffE = PDiff.end(); + PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) { + + unsigned PSetID = PDiffI->getPSet(); + unsigned Limit = RCI->getRegPressureSetLimit(PSetID); + if (!LiveThruPressure.empty()) + Limit += LiveThruPressure[PSetID]; + + unsigned POld = CurrSetPressure[PSetID]; + unsigned MOld = P.MaxSetPressure[PSetID]; + unsigned MNew = MOld; + // Ignore DeadDefs here because they aren't captured by PressureChange. + unsigned PNew = POld + PDiffI->getUnitInc(); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) + && "PSet overflow/underflow"); + if (PNew > MOld) + MNew = PNew; + // Check if current pressure has exceeded the limit. + if (!Delta.Excess.isValid()) { + unsigned ExcessInc = 0; + if (PNew > Limit) + ExcessInc = POld > Limit ? PNew - POld : PNew - Limit; + else if (POld > Limit) + ExcessInc = Limit - POld; + if (ExcessInc) { + Delta.Excess = PressureChange(PSetID); + Delta.Excess.setUnitInc(ExcessInc); + } + } + // Check if max pressure has exceeded a critical pressure set max. + if (MNew == MOld) + continue; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) { + int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (CritInc > 0 && CritInc <= INT16_MAX) { + Delta.CriticalMax = PressureChange(PSetID); + Delta.CriticalMax.setUnitInc(CritInc); + } + } + } + // Check if max pressure has exceeded the current max. + if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) { + Delta.CurrentMax = PressureChange(PSetID); + Delta.CurrentMax.setUnitInc(MNew - MOld); + } + } +} + +/// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). +static bool findUseBetween(unsigned Reg, SlotIndex PriorUseIdx, + SlotIndex NextUseIdx, const MachineRegisterInfo &MRI, + const LiveIntervals *LIS) { + for (const MachineInstr &MI : MRI.use_nodbg_instructions(Reg)) { + SlotIndex InstSlot = LIS->getInstructionIndex(&MI).getRegSlot(); + if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) + return true; + } + return false; +} + +/// Record the downward impact of a single instruction on current register +/// pressure. Unlike the advance/recede pressure tracking interface, this does +/// not discover live in/outs. +/// +/// This is intended for speculative queries. It leaves pressure inconsistent +/// with the current position, so must be restored by the caller. +void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { + assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + + // Account for register pressure similar to RegPressureTracker::recede(). + RegisterOperands RegOpers; + RegOpers.collect(*MI, *TRI, *MRI); + + // Kill liveness at last uses. Assume allocatable physregs are single-use + // rather than checking LiveIntervals. + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + + for (unsigned Reg : RegOpers.Uses) { + if (RequireIntervals) { + // FIXME: allow the caller to pass in the list of vreg uses that remain + // to be bottom-scheduled to avoid searching uses at each query. + SlotIndex CurrIdx = getCurrSlot(); + const LiveRange *LR = getLiveRange(*LIS, Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS)) + decreaseRegPressure(Reg); + } + } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Allocatable physregs are always single-use before register rewriting. + decreaseRegPressure(Reg); + } + } + + // Generate liveness for defs. + increaseRegPressure(RegOpers.Defs); + + // Boost pressure for all dead defs together. + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); +} + +/// Consider the pressure increase caused by traversing this instruction +/// top-down. Find the register class with the most change in its pressure limit +/// based on the tracker's current pressure, and return the number of excess +/// register units of that pressure set introduced by this instruction. +/// +/// This assumes that the current LiveIn set is sufficient. +/// +/// This is expensive for an on-the-fly query because it calls +/// bumpDownwardPressure to recompute the pressure sets based on current +/// liveness. We don't yet have a fast version of downward pressure tracking +/// analogous to getUpwardPressureDelta. +void RegPressureTracker:: +getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, + ArrayRef<PressureChange> CriticalPSets, + ArrayRef<unsigned> MaxPressureLimit) { + // Snapshot Pressure. + std::vector<unsigned> SavedPressure = CurrSetPressure; + std::vector<unsigned> SavedMaxPressure = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); + computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, + MaxPressureLimit, Delta); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); + + // Restore the tracker's state. + P.MaxSetPressure.swap(SavedMaxPressure); + CurrSetPressure.swap(SavedPressure); +} + +/// Get the pressure of each PSet after traversing this instruction bottom-up. +void RegPressureTracker:: +getUpwardPressure(const MachineInstr *MI, + std::vector<unsigned> &PressureResult, + std::vector<unsigned> &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpUpwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} + +/// Get the pressure of each PSet after traversing this instruction top-down. +void RegPressureTracker:: +getDownwardPressure(const MachineInstr *MI, + std::vector<unsigned> &PressureResult, + std::vector<unsigned> &MaxPressureResult) { + // Snapshot pressure. + PressureResult = CurrSetPressure; + MaxPressureResult = P.MaxSetPressure; + + bumpDownwardPressure(MI); + + // Current pressure becomes the result. Restore current pressure. + P.MaxSetPressure.swap(MaxPressureResult); + CurrSetPressure.swap(PressureResult); +} diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp new file mode 100644 index 0000000..8fa1bf7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -0,0 +1,444 @@ +//===-- RegisterScavenging.cpp - Machine register scavenging --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the machine register scavenger. It can provide +// information, such as unused registers, at any point in a machine basic block. +// It also provides a mechanism to make registers available by evicting them to +// spill slots. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "reg-scavenging" + +/// setUsed - Set the register units of this register as used. +void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) { + for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) { + LaneBitmask UnitMask = (*RUI).second; + if (UnitMask == 0 || (LaneMask & UnitMask) != 0) + RegUnitsAvailable.reset((*RUI).first); + } +} + +void RegScavenger::initRegState() { + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + I->Reg = 0; + I->Restore = nullptr; + } + + // All register units start out unused. + RegUnitsAvailable.set(); + + if (!MBB) + return; + + // Live-in registers are in use. + for (const auto &LI : MBB->liveins()) + setRegUsed(LI.PhysReg, LI.LaneMask); + + // Pristine CSRs are also unavailable. + const MachineFunction &MF = *MBB->getParent(); + BitVector PR = MF.getFrameInfo()->getPristineRegs(MF); + for (int I = PR.find_first(); I>0; I = PR.find_next(I)) + setRegUsed(I); +} + +void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { + MachineFunction &MF = *mbb->getParent(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + + assert((NumRegUnits == 0 || NumRegUnits == TRI->getNumRegUnits()) && + "Target changed?"); + + // It is not possible to use the register scavenger after late optimization + // passes that don't preserve accurate liveness information. + assert(MRI->tracksLiveness() && + "Cannot use register scavenger with inaccurate liveness"); + + // Self-initialize. + if (!MBB) { + NumRegUnits = TRI->getNumRegUnits(); + RegUnitsAvailable.resize(NumRegUnits); + KillRegUnits.resize(NumRegUnits); + DefRegUnits.resize(NumRegUnits); + TmpRegUnits.resize(NumRegUnits); + } + + MBB = mbb; + initRegState(); + + Tracking = false; +} + +void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) { + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + BV.set(*RUI); +} + +void RegScavenger::determineKillsAndDefs() { + assert(Tracking && "Must be tracking to determine kills and defs"); + + MachineInstr *MI = MBBI; + assert(!MI->isDebugValue() && "Debug values have no kills or defs"); + + // Find out which registers are early clobbered, killed, defined, and marked + // def-dead in this instruction. + KillRegUnits.reset(); + DefRegUnits.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) { + + TmpRegUnits.clear(); + for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd; ++RU) { + for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) { + if (MO.clobbersPhysReg(*RURI)) { + TmpRegUnits.set(RU); + break; + } + } + } + + // Apply the mask. + KillRegUnits |= TmpRegUnits; + } + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) + continue; + + if (MO.isUse()) { + // Ignore undef uses. + if (MO.isUndef()) + continue; + if (MO.isKill()) + addRegUnits(KillRegUnits, Reg); + } else { + assert(MO.isDef()); + if (MO.isDead()) + addRegUnits(KillRegUnits, Reg); + else + addRegUnits(DefRegUnits, Reg); + } + } +} + +void RegScavenger::unprocess() { + assert(Tracking && "Cannot unprocess because we're not tracking"); + + MachineInstr *MI = MBBI; + if (!MI->isDebugValue()) { + determineKillsAndDefs(); + + // Commit the changes. + setUsed(KillRegUnits); + setUnused(DefRegUnits); + } + + if (MBBI == MBB->begin()) { + MBBI = MachineBasicBlock::iterator(nullptr); + Tracking = false; + } else + --MBBI; +} + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); + MBBI = std::next(MBBI); + } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + + MachineInstr *MI = MBBI; + + for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + if (I->Restore != MI) + continue; + + I->Reg = 0; + I->Restore = nullptr; + } + + if (MI->isDebugValue()) + return; + + determineKillsAndDefs(); + + // Verify uses and defs. +#ifndef NDEBUG + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) + continue; + if (MO.isUse()) { + if (MO.isUndef()) + continue; + if (!isRegUsed(Reg)) { + // Check if it's partial live: e.g. + // D0 = insert_subreg D0<undef>, S0 + // ... D0 + // The problem is the insert_subreg could be eliminated. The use of + // D0 is using a partially undef value. This is not *incorrect* since + // S1 is can be freely clobbered. + // Ideally we would like a way to model this, but leaving the + // insert_subreg around causes both correctness and performance issues. + bool SubUsed = false; + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + if (isRegUsed(*SubRegs)) { + SubUsed = true; + break; + } + bool SuperUsed = false; + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid(); ++SR) { + if (isRegUsed(*SR)) { + SuperUsed = true; + break; + } + } + if (!SubUsed && !SuperUsed) { + MBB->getParent()->verify(nullptr, "In Register Scavenger"); + llvm_unreachable("Using an undefined register!"); + } + (void)SubUsed; + (void)SuperUsed; + } + } else { + assert(MO.isDef()); +#if 0 + // FIXME: Enable this once we've figured out how to correctly transfer + // implicit kills during codegen passes like the coalescer. + assert((KillRegs.test(Reg) || isUnused(Reg) || + isLiveInButUnusedBefore(Reg, MI, MBB, TRI, MRI)) && + "Re-defining a live register!"); +#endif + } + } +#endif // NDEBUG + + // Commit the changes. + setUnused(KillRegUnits); + setUsed(DefRegUnits); +} + +bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const { + if (includeReserved && isReserved(Reg)) + return true; + for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) + if (!RegUnitsAvailable.test(*RUI)) + return true; + return false; +} + +unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isRegUsed(*I)) { + DEBUG(dbgs() << "Scavenger found unused reg: " << TRI->getName(*I) << + "\n"); + return *I; + } + return 0; +} + +/// getRegsAvailable - Return all available registers in the register class +/// in Mask. +BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { + BitVector Mask(TRI->getNumRegs()); + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isRegUsed(*I)) + Mask.set(*I); + return Mask; +} + +/// findSurvivorReg - Return the candidate register that is unused for the +/// longest after StartMII. UseMI is set to the instruction where the search +/// stopped. +/// +/// No more than InstrLimit instructions are inspected. +/// +unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, + BitVector &Candidates, + unsigned InstrLimit, + MachineBasicBlock::iterator &UseMI) { + int Survivor = Candidates.find_first(); + assert(Survivor > 0 && "No candidates for scavenging"); + + MachineBasicBlock::iterator ME = MBB->getFirstTerminator(); + assert(StartMI != ME && "MI already at terminator"); + MachineBasicBlock::iterator RestorePointMI = StartMI; + MachineBasicBlock::iterator MI = StartMI; + + bool inVirtLiveRange = false; + for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + if (MI->isDebugValue()) { + ++InstrLimit; // Don't count debug instructions + continue; + } + bool isVirtKillInsn = false; + bool isVirtDefInsn = false; + // Remove any candidates touched by instruction. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + Candidates.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg() || MO.isUndef() || !MO.getReg()) + continue; + if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isDef()) + isVirtDefInsn = true; + else if (MO.isKill()) + isVirtKillInsn = true; + continue; + } + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Candidates.reset(*AI); + } + // If we're not in a virtual reg's live range, this is a valid + // restore point. + if (!inVirtLiveRange) RestorePointMI = MI; + + // Update whether we're in the live range of a virtual register + if (isVirtKillInsn) inVirtLiveRange = false; + if (isVirtDefInsn) inVirtLiveRange = true; + + // Was our survivor untouched by this instruction? + if (Candidates.test(Survivor)) + continue; + + // All candidates gone? + if (Candidates.none()) + break; + + Survivor = Candidates.find_first(); + } + // If we ran off the end, that's where we want to restore. + if (MI == ME) RestorePointMI = ME; + assert (RestorePointMI != StartMI && + "No available scavenger restore location!"); + + // We ran out of candidates, so stop the search. + UseMI = RestorePointMI; + return Survivor; +} + +static unsigned getFrameIndexOperandNum(MachineInstr *MI) { + unsigned i = 0; + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + return i; +} + +unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, + MachineBasicBlock::iterator I, + int SPAdj) { + // Consider all allocatable registers in the register class initially + BitVector Candidates = + TRI->getAllocatableSet(*I->getParent()->getParent(), RC); + + // Exclude all the registers being used by the instruction. + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + MachineOperand &MO = I->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && + !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + Candidates.reset(MO.getReg()); + } + + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + BitVector Available = getRegsAvailable(RC); + Available &= Candidates; + if (Available.any()) + Candidates = Available; + + // Find the register whose use is furthest away. + MachineBasicBlock::iterator UseMI; + unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); + + // If we found an unused register there is no reason to spill it. + if (!isRegUsed(SReg)) { + DEBUG(dbgs() << "Scavenged register: " << TRI->getName(SReg) << "\n"); + return SReg; + } + + // Find an available scavenging slot. + unsigned SI; + for (SI = 0; SI < Scavenged.size(); ++SI) + if (Scavenged[SI].Reg == 0) + break; + + if (SI == Scavenged.size()) { + // We need to scavenge a register but have no spill slot, the target + // must know how to do it (if not, we'll assert below). + Scavenged.push_back(ScavengedInfo()); + } + + // Avoid infinite regress + Scavenged[SI].Reg = SReg; + + // If the target knows how to save/restore the register, let it do so; + // otherwise, use the emergency stack spill slot. + if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { + // Spill the scavenged register before I. + assert(Scavenged[SI].FrameIndex >= 0 && + "Cannot scavenge register without an emergency spill slot!"); + TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, + RC, TRI); + MachineBasicBlock::iterator II = std::prev(I); + + unsigned FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); + + // Restore the scavenged register before its use (or first terminator). + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, + RC, TRI); + II = std::prev(UseMI); + + FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); + } + + Scavenged[SI].Restore = std::prev(UseMI); + + // Doing this here leads to infinite regress. + // Scavenged[SI].Reg = SReg; + + DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << + "\n"); + + return SReg; +} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp new file mode 100644 index 0000000..efde61e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -0,0 +1,641 @@ +//===---- ScheduleDAG.cpp - Implement the ScheduleDAG class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <climits> +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +#ifndef NDEBUG +static cl::opt<bool> StressSchedOpt( + "stress-sched", cl::Hidden, cl::init(false), + cl::desc("Stress test instruction scheduling")); +#endif + +void SchedulingPriorityQueue::anchor() { } + +ScheduleDAG::ScheduleDAG(MachineFunction &mf) + : TM(mf.getTarget()), TII(mf.getSubtarget().getInstrInfo()), + TRI(mf.getSubtarget().getRegisterInfo()), MF(mf), + MRI(mf.getRegInfo()), EntrySU(), ExitSU() { +#ifndef NDEBUG + StressSched = StressSchedOpt; +#endif +} + +ScheduleDAG::~ScheduleDAG() {} + +/// Clear the DAG state (e.g. between scheduling regions). +void ScheduleDAG::clearDAG() { + SUnits.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); +} + +/// getInstrDesc helper to handle SDNodes. +const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { + if (!Node || !Node->isMachineOpcode()) return nullptr; + return &TII->get(Node->getMachineOpcode()); +} + +/// addPred - This adds the specified edge as a pred of the current node if +/// not already. It also adds the current node as a successor of the +/// specified node. +bool SUnit::addPred(const SDep &D, bool Required) { + // If this node already has this dependence, don't add a redundant one. + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + // Zero-latency weak edges may be added purely for heuristic ordering. Don't + // add them if another kind of edge already exists. + if (!Required && I->getSUnit() == D.getSUnit()) + return false; + if (I->overlaps(D)) { + // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). + if (I->getLatency() < D.getLatency()) { + SUnit *PredSU = I->getSUnit(); + // Find the corresponding successor in N. + SDep ForwardD = *I; + ForwardD.setSUnit(this); + for (SmallVectorImpl<SDep>::iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + if (*II == ForwardD) { + II->setLatency(D.getLatency()); + break; + } + } + I->setLatency(D.getLatency()); + } + return false; + } + } + // Now add a corresponding succ to N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + // Update the bookkeeping. + if (D.getKind() == SDep::Data) { + assert(NumPreds < UINT_MAX && "NumPreds will overflow!"); + assert(N->NumSuccs < UINT_MAX && "NumSuccs will overflow!"); + ++NumPreds; + ++N->NumSuccs; + } + if (!N->isScheduled) { + if (D.isWeak()) { + ++WeakPredsLeft; + } + else { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); + ++NumPredsLeft; + } + } + if (!isScheduled) { + if (D.isWeak()) { + ++N->WeakSuccsLeft; + } + else { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++N->NumSuccsLeft; + } + } + Preds.push_back(D); + N->Succs.push_back(P); + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return true; +} + +/// removePred - This removes the specified edge as a pred of the current +/// node if it exists. It also removes the current node as a successor of +/// the specified node. +void SUnit::removePred(const SDep &D) { + // Find the matching predecessor. + for (SmallVectorImpl<SDep>::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) + if (*I == D) { + // Find the corresponding successor in N. + SDep P = D; + P.setSUnit(this); + SUnit *N = D.getSUnit(); + SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(), + N->Succs.end(), P); + assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); + N->Succs.erase(Succ); + Preds.erase(I); + // Update the bookkeeping. + if (P.getKind() == SDep::Data) { + assert(NumPreds > 0 && "NumPreds will underflow!"); + assert(N->NumSuccs > 0 && "NumSuccs will underflow!"); + --NumPreds; + --N->NumSuccs; + } + if (!N->isScheduled) { + if (D.isWeak()) + --WeakPredsLeft; + else { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); + --NumPredsLeft; + } + } + if (!isScheduled) { + if (D.isWeak()) + --N->WeakSuccsLeft; + else { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); + --N->NumSuccsLeft; + } + } + if (P.getLatency() != 0) { + this->setDepthDirty(); + N->setHeightDirty(); + } + return; + } +} + +void SUnit::setDepthDirty() { + if (!isDepthCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isDepthCurrent = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), + E = SU->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isDepthCurrent) + WorkList.push_back(SuccSU); + } + } while (!WorkList.empty()); +} + +void SUnit::setHeightDirty() { + if (!isHeightCurrent) return; + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *SU = WorkList.pop_back_val(); + SU->isHeightCurrent = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), + E = SU->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isHeightCurrent) + WorkList.push_back(PredSU); + } + } while (!WorkList.empty()); +} + +/// setDepthToAtLeast - Update this node's successors to reflect the +/// fact that this node's depth just increased. +/// +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= getDepth()) + return; + setDepthDirty(); + Depth = NewDepth; + isDepthCurrent = true; +} + +/// setHeightToAtLeast - Update this node's predecessors to reflect the +/// fact that this node's height just increased. +/// +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= getHeight()) + return; + setHeightDirty(); + Height = NewHeight; + isHeightCurrent = true; +} + +/// ComputeDepth - Calculate the maximal path from the node to the exit. +/// +void SUnit::ComputeDepth() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxPredDepth = 0; + for (SUnit::const_pred_iterator I = Cur->Preds.begin(), + E = Cur->Preds.end(); I != E; ++I) { + SUnit *PredSU = I->getSUnit(); + if (PredSU->isDepthCurrent) + MaxPredDepth = std::max(MaxPredDepth, + PredSU->Depth + I->getLatency()); + else { + Done = false; + WorkList.push_back(PredSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxPredDepth != Cur->Depth) { + Cur->setDepthDirty(); + Cur->Depth = MaxPredDepth; + } + Cur->isDepthCurrent = true; + } + } while (!WorkList.empty()); +} + +/// ComputeHeight - Calculate the maximal path from the node to the entry. +/// +void SUnit::ComputeHeight() { + SmallVector<SUnit*, 8> WorkList; + WorkList.push_back(this); + do { + SUnit *Cur = WorkList.back(); + + bool Done = true; + unsigned MaxSuccHeight = 0; + for (SUnit::const_succ_iterator I = Cur->Succs.begin(), + E = Cur->Succs.end(); I != E; ++I) { + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isHeightCurrent) + MaxSuccHeight = std::max(MaxSuccHeight, + SuccSU->Height + I->getLatency()); + else { + Done = false; + WorkList.push_back(SuccSU); + } + } + + if (Done) { + WorkList.pop_back(); + if (MaxSuccHeight != Cur->Height) { + Cur->setHeightDirty(); + Cur->Height = MaxSuccHeight; + } + Cur->isHeightCurrent = true; + } + } while (!WorkList.empty()); +} + +void SUnit::biasCriticalPath() { + if (NumPreds < 2) + return; + + SUnit::pred_iterator BestI = Preds.begin(); + unsigned MaxDepth = BestI->getSUnit()->getDepth(); + for (SUnit::pred_iterator I = std::next(BestI), E = Preds.end(); I != E; + ++I) { + if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) + BestI = I; + } + if (BestI != Preds.begin()) + std::swap(*Preds.begin(), *BestI); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or +/// a group of nodes flagged together. +void SUnit::dump(const ScheduleDAG *G) const { + dbgs() << "SU(" << NodeNum << "): "; + G->dumpNode(this); +} + +void SUnit::dumpAll(const ScheduleDAG *G) const { + dump(G); + + dbgs() << " # preds left : " << NumPredsLeft << "\n"; + dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + if (WeakPredsLeft) + dbgs() << " # weak preds left : " << WeakPredsLeft << "\n"; + if (WeakSuccsLeft) + dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; + dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; + dbgs() << " Latency : " << Latency << "\n"; + dbgs() << " Depth : " << getDepth() << "\n"; + dbgs() << " Height : " << getHeight() << "\n"; + + if (Preds.size() != 0) { + dbgs() << " Predecessors:\n"; + for (SUnit::const_succ_iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { + dbgs() << " "; + switch (I->getKind()) { + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; + } + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); + dbgs() << "\n"; + } + } + if (Succs.size() != 0) { + dbgs() << " Successors:\n"; + for (SUnit::const_succ_iterator I = Succs.begin(), E = Succs.end(); + I != E; ++I) { + dbgs() << " "; + switch (I->getKind()) { + case SDep::Data: dbgs() << "val "; break; + case SDep::Anti: dbgs() << "anti"; break; + case SDep::Output: dbgs() << "out "; break; + case SDep::Order: dbgs() << "ch "; break; + } + dbgs() << "SU(" << I->getSUnit()->NodeNum << ")"; + if (I->isArtificial()) + dbgs() << " *"; + dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); + dbgs() << "\n"; + } + } +} +#endif + +#ifndef NDEBUG +/// VerifyScheduledDAG - Verify that all SUnits were scheduled and that +/// their state is consistent. Return the number of scheduled nodes. +/// +unsigned ScheduleDAG::VerifyScheduledDAG(bool isBottomUp) { + bool AnyNotSched = false; + unsigned DeadNodes = 0; + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + if (!SUnits[i].isScheduled) { + if (SUnits[i].NumPreds == 0 && SUnits[i].NumSuccs == 0) { + ++DeadNodes; + continue; + } + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has not been scheduled!\n"; + AnyNotSched = true; + } + if (SUnits[i].isScheduled && + (isBottomUp ? SUnits[i].getHeight() : SUnits[i].getDepth()) > + unsigned(INT_MAX)) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has an unexpected " + << (isBottomUp ? "Height" : "Depth") << " value!\n"; + AnyNotSched = true; + } + if (isBottomUp) { + if (SUnits[i].NumSuccsLeft != 0) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has successors left!\n"; + AnyNotSched = true; + } + } else { + if (SUnits[i].NumPredsLeft != 0) { + if (!AnyNotSched) + dbgs() << "*** Scheduling failed! ***\n"; + SUnits[i].dump(this); + dbgs() << "has predecessors left!\n"; + AnyNotSched = true; + } + } + } + assert(!AnyNotSched); + return SUnits.size() - DeadNodes; +} +#endif + +/// InitDAGTopologicalSorting - create the initial topological +/// ordering from the DAG to be scheduled. +/// +/// The idea of the algorithm is taken from +/// "Online algorithms for managing the topological order of +/// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// "Maintaining a topological order under edge insertions". +/// +/// Short description of the algorithm: +/// +/// Topological ordering, ord, of a DAG maps each node to a topological +/// index so that for all edges X->Y it is the case that ord(X) < ord(Y). +/// +/// This means that if there is a path from the node X to the node Z, +/// then ord(X) < ord(Z). +/// +/// This property can be used to check for reachability of nodes: +/// if Z is reachable from X, then an insertion of the edge Z->X would +/// create a cycle. +/// +/// The algorithm first computes a topological ordering for the DAG by +/// initializing the Index2Node and Node2Index arrays and then tries to keep +/// the ordering up-to-date after edge insertions by reordering the DAG. +/// +/// On insertion of the edge X->Y, the algorithm first marks by calling DFS +/// the nodes reachable from Y, and then shifts them using Shift to lie +/// immediately after X in Index2Node. +void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { + unsigned DAGSize = SUnits.size(); + std::vector<SUnit*> WorkList; + WorkList.reserve(DAGSize); + + Index2Node.resize(DAGSize); + Node2Index.resize(DAGSize); + + // Initialize the data structures. + if (ExitSU) + WorkList.push_back(ExitSU); + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + int NodeNum = SU->NodeNum; + unsigned Degree = SU->Succs.size(); + // Temporarily use the Node2Index array as scratch space for degree counts. + Node2Index[NodeNum] = Degree; + + // Is it a node without dependencies? + if (Degree == 0) { + assert(SU->Succs.empty() && "SUnit should have no successors"); + // Collect leaf nodes. + WorkList.push_back(SU); + } + } + + int Id = DAGSize; + while (!WorkList.empty()) { + SUnit *SU = WorkList.back(); + WorkList.pop_back(); + if (SU->NodeNum < DAGSize) + Allocate(SU->NodeNum, --Id); + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit *SU = I->getSUnit(); + if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum]) + // If all dependencies of the node are processed already, + // then the node can be computed now. + WorkList.push_back(SU); + } + } + + Visited.resize(DAGSize); + +#ifndef NDEBUG + // Check correctness of the ordering + for (unsigned i = 0, e = DAGSize; i != e; ++i) { + SUnit *SU = &SUnits[i]; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + "Wrong topological sorting"); + } + } +#endif +} + +/// AddPred - Updates the topological ordering to accommodate an edge +/// to be added from SUnit X to SUnit Y. +void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { + int UpperBound, LowerBound; + LowerBound = Node2Index[Y->NodeNum]; + UpperBound = Node2Index[X->NodeNum]; + bool HasLoop = false; + // Is Ord(X) < Ord(Y) ? + if (LowerBound < UpperBound) { + // Update the topological order. + Visited.reset(); + DFS(Y, UpperBound, HasLoop); + assert(!HasLoop && "Inserted edge creates a loop!"); + // Recompute topological indexes. + Shift(Visited, LowerBound, UpperBound); + } +} + +/// RemovePred - Updates the topological ordering to accommodate an +/// an edge to be removed from the specified node N from the predecessors +/// of the current node M. +void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { + // InitDAGTopologicalSorting(); +} + +/// DFS - Make a DFS traversal to mark all nodes reachable from SU and mark +/// all nodes affected by the edge insertion. These nodes will later get new +/// topological indexes by means of the Shift method. +void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, + bool &HasLoop) { + std::vector<const SUnit*> WorkList; + WorkList.reserve(SUnits.size()); + + WorkList.push_back(SU); + do { + SU = WorkList.back(); + WorkList.pop_back(); + Visited.set(SU->NodeNum); + for (int I = SU->Succs.size()-1; I >= 0; --I) { + unsigned s = SU->Succs[I].getSUnit()->NodeNum; + // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). + if (s >= Node2Index.size()) + continue; + if (Node2Index[s] == UpperBound) { + HasLoop = true; + return; + } + // Visit successors if not already and in affected region. + if (!Visited.test(s) && Node2Index[s] < UpperBound) { + WorkList.push_back(SU->Succs[I].getSUnit()); + } + } + } while (!WorkList.empty()); +} + +/// Shift - Renumber the nodes so that the topological ordering is +/// preserved. +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, + int UpperBound) { + std::vector<int> L; + int shift = 0; + int i; + + for (i = LowerBound; i <= UpperBound; ++i) { + // w is node at topological index i. + int w = Index2Node[i]; + if (Visited.test(w)) { + // Unmark. + Visited.reset(w); + L.push_back(w); + shift = shift + 1; + } else { + Allocate(w, i - shift); + } + } + + for (unsigned j = 0; j < L.size(); ++j) { + Allocate(L[j], i - shift); + i = i + 1; + } +} + + +/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will +/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU). +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { + // Is SU reachable from TargetSU via successor edges? + if (IsReachable(SU, TargetSU)) + return true; + for (SUnit::pred_iterator + I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I) + if (I->isAssignedRegDep() && + IsReachable(SU, I->getSUnit())) + return true; + return false; +} + +/// IsReachable - Checks if SU is reachable from TargetSU. +bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, + const SUnit *TargetSU) { + // If insertion of the edge SU->TargetSU would create a cycle + // then there is a path from TargetSU to SU. + int UpperBound, LowerBound; + LowerBound = Node2Index[TargetSU->NodeNum]; + UpperBound = Node2Index[SU->NodeNum]; + bool HasLoop = false; + // Is Ord(TargetSU) < Ord(SU) ? + if (LowerBound < UpperBound) { + Visited.reset(); + // There may be a path from TargetSU to SU. Check for it. + DFS(TargetSU, UpperBound, HasLoop); + } + return HasLoop; +} + +/// Allocate - assign the topological index to the node n. +void ScheduleDAGTopologicalSort::Allocate(int n, int index) { + Node2Index[n] = index; + Index2Node[index] = n; +} + +ScheduleDAGTopologicalSort:: +ScheduleDAGTopologicalSort(std::vector<SUnit> &sunits, SUnit *exitsu) + : SUnits(sunits), ExitSU(exitsu) {} + +ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp new file mode 100644 index 0000000..fb82ab7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -0,0 +1,1686 @@ +//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAGInstrs class, which implements re-scheduling +// of MachineInstrs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/IntEqClasses.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/RegisterPressure.h" +#include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <queue> + +using namespace llvm; + +#define DEBUG_TYPE "misched" + +static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Enable use of AA during MI DAG construction")); + +static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, + cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction")); + +ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, + const MachineLoopInfo *mli, + bool RemoveKillFlags) + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), + RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), + TrackLaneMasks(false), FirstDbgValue(nullptr) { + DbgValues.clear(); + + const TargetSubtargetInfo &ST = mf.getSubtarget(); + SchedModel.init(ST.getSchedModel(), &ST, TII); +} + +/// getUnderlyingObjectFromInt - This is the function that does the work of +/// looking through basic ptrtoint+arithmetic+inttoptr sequences. +static const Value *getUnderlyingObjectFromInt(const Value *V) { + do { + if (const Operator *U = dyn_cast<Operator>(V)) { + // If we find a ptrtoint, we can transfer control back to the + // regular getUnderlyingObjectFromInt. + if (U->getOpcode() == Instruction::PtrToInt) + return U->getOperand(0); + // If we find an add of a constant, a multiplied value, or a phi, it's + // likely that the other operand will lead us to the base + // object. We don't have to worry about the case where the + // object address is somehow being computed by the multiply, + // because our callers only care when the result is an + // identifiable object. + if (U->getOpcode() != Instruction::Add || + (!isa<ConstantInt>(U->getOperand(1)) && + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && + !isa<PHINode>(U->getOperand(1)))) + return V; + V = U->getOperand(0); + } else { + return V; + } + assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); + } while (1); +} + +/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects +/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. +static void getUnderlyingObjects(const Value *V, + SmallVectorImpl<Value *> &Objects, + const DataLayout &DL) { + SmallPtrSet<const Value *, 16> Visited; + SmallVector<const Value *, 4> Working(1, V); + do { + V = Working.pop_back_val(); + + SmallVector<Value *, 4> Objs; + GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL); + + for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + V = *I; + if (!Visited.insert(V).second) + continue; + if (Operator::getOpcode(V) == Instruction::IntToPtr) { + const Value *O = + getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); + if (O->getType()->isPointerTy()) { + Working.push_back(O); + continue; + } + } + Objects.push_back(const_cast<Value *>(V)); + } + } while (!Working.empty()); +} + +typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType; +typedef SmallVector<PointerIntPair<ValueType, 1, bool>, 4> +UnderlyingObjectsVector; + +/// getUnderlyingObjectsForInstr - If this machine instr has memory reference +/// information and it can be tracked to a normal reference to a known +/// object, return the Value for that object. +static void getUnderlyingObjectsForInstr(const MachineInstr *MI, + const MachineFrameInfo *MFI, + UnderlyingObjectsVector &Objects, + const DataLayout &DL) { + if (!MI->hasOneMemOperand() || + (!(*MI->memoperands_begin())->getValue() && + !(*MI->memoperands_begin())->getPseudoValue()) || + (*MI->memoperands_begin())->isVolatile()) + return; + + if (const PseudoSourceValue *PSV = + (*MI->memoperands_begin())->getPseudoValue()) { + // Function that contain tail calls don't have unique PseudoSourceValue + // objects. Two PseudoSourceValues might refer to the same or overlapping + // locations. The client code calling this function assumes this is not the + // case. So return a conservative answer of no known object. + if (MFI->hasTailCall()) + return; + + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (!PSV->isAliased(MFI)) { + bool MayAlias = PSV->mayAlias(MFI); + Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias)); + } + return; + } + + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return; + + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs, DL); + + for (Value *V : Objs) { + if (!isIdentifiedObject(V)) { + Objects.clear(); + return; + } + + Objects.push_back(UnderlyingObjectsVector::value_type(V, true)); + } +} + +void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { + BB = bb; +} + +void ScheduleDAGInstrs::finishBlock() { + // Subclasses should no longer refer to the old block. + BB = nullptr; +} + +/// Initialize the DAG and common scheduler state for the current scheduling +/// region. This does not actually create the DAG, only clears it. The +/// scheduling driver may call BuildSchedGraph multiple times per scheduling +/// region. +void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, + MachineBasicBlock::iterator begin, + MachineBasicBlock::iterator end, + unsigned regioninstrs) { + assert(bb == BB && "startBlock should set BB"); + RegionBegin = begin; + RegionEnd = end; + NumRegionInstrs = regioninstrs; +} + +/// Close the current scheduling region. Don't clear any state in case the +/// driver wants to refer to the previous scheduling region. +void ScheduleDAGInstrs::exitRegion() { + // Nothing to do. +} + +/// addSchedBarrierDeps - Add dependencies from instructions in the current +/// list of instructions being scheduled to scheduling barrier by adding +/// the exit SU to the register defs and use list. This is because we want to +/// make sure instructions which define registers that are either used by +/// the terminator or are live-out are properly scheduled. This is +/// especially important when the definition latency of the return value(s) +/// are too high to be hidden by the branch or when the liveout registers +/// used by instructions in the fallthrough block. +void ScheduleDAGInstrs::addSchedBarrierDeps() { + MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr; + ExitSU.setInstr(ExitMI); + bool AllDepKnown = ExitMI && + (ExitMI->isCall() || ExitMI->isBarrier()); + if (ExitMI && AllDepKnown) { + // If it's a call or a barrier, add dependencies on the defs and uses of + // instruction. + for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = ExitMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TRI->isPhysicalRegister(Reg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(&ExitSU, i); + } + } else { + // For others, e.g. fallthrough, conditional branch, assume the exit + // uses all the registers that are livein to the successor blocks. + assert(Uses.empty() && "Uses in set before adding deps?"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) + for (const auto &LI : (*SI)->liveins()) { + if (!Uses.contains(LI.PhysReg)) + Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg)); + } + } +} + +/// MO is an operand of SU's instruction that defines a physical register. Add +/// data dependencies from SU to any uses of the physical register. +void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { + const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); + assert(MO.isDef() && "expect physreg def"); + + // Ask the target if address-backscheduling is desirable, and if so how much. + const TargetSubtargetInfo &ST = MF.getSubtarget(); + + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { + if (!Uses.contains(*Alias)) + continue; + for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { + SUnit *UseSU = I->SU; + if (UseSU == SU) + continue; + + // Adjust the dependence latency using operand def/use information, + // then allow the target to perform its own adjustments. + int UseOp = I->OpIdx; + MachineInstr *RegUse = nullptr; + SDep Dep; + if (UseOp < 0) + Dep = SDep(SU, SDep::Artificial); + else { + // Set the hasPhysRegDefs only for physreg defs that have a use within + // the scheduling region. + SU->hasPhysRegDefs = true; + Dep = SDep(SU, SDep::Data, *Alias); + RegUse = UseSU->getInstr(); + } + Dep.setLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, + UseOp)); + + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + } +} + +/// addPhysRegDeps - Add register dependencies (data, anti, and output) from +/// this SUnit to following instructions in the same scheduling region that +/// depend the physical register referenced at OperIdx. +void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + + // Optionally add output and anti dependencies. For anti + // dependencies we use a latency of 0 because for a multi-issue + // target we want to allow the defining instruction to issue + // in the same cycle as the using instruction. + // TODO: Using a latency of 1 here for output dependencies assumes + // there's no cost for reusing registers. + SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; + for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); + Alias.isValid(); ++Alias) { + if (!Defs.contains(*Alias)) + continue; + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; + if (DefSU == &ExitSU) + continue; + if (DefSU != SU && + (Kind != SDep::Output || !MO.isDead() || + !DefSU->getInstr()->registerDefIsDead(*Alias))) { + if (Kind == SDep::Anti) + DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); + else { + SDep Dep(SU, Kind, /*Reg=*/*Alias); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + } + } + } + } + + if (!MO.isDef()) { + SU->hasPhysRegUses = true; + // Either insert a new Reg2SUnits entry with an empty SUnits list, or + // retrieve the existing SUnits list for this register's uses. + // Push this SUnit on the use list. + Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); + if (RemoveKillFlags) + MO.setIsKill(false); + } + else { + addPhysRegDataDeps(SU, OperIdx); + unsigned Reg = MO.getReg(); + + // clear this register's use list + if (Uses.contains(Reg)) + Uses.eraseAll(Reg); + + if (!MO.isDead()) { + Defs.eraseAll(Reg); + } else if (SU->isCall) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); + } + } + + // Defs are pushed in the order they are visited and never reordered. + Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); + } +} + +LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const +{ + unsigned Reg = MO.getReg(); + // No point in tracking lanemasks if we don't have interesting subregisters. + const TargetRegisterClass &RC = *MRI.getRegClass(Reg); + if (!RC.HasDisjunctSubRegs) + return ~0u; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) + return RC.getLaneMask(); + return TRI->getSubRegIndexLaneMask(SubReg); +} + +/// addVRegDefDeps - Add register output and data dependencies from this SUnit +/// to instructions that occur later in the same scheduling region if they read +/// from or write to the virtual register defined at OperIdx. +/// +/// TODO: Hoist loop induction variable increments. This has to be +/// reevaluated. Generally, IV scheduling should be done before coalescing. +void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + LaneBitmask DefLaneMask; + LaneBitmask KillLaneMask; + if (TrackLaneMasks) { + bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); + DefLaneMask = getLaneMaskForMO(MO); + // If we have a <read-undef> flag, none of the lane values comes from an + // earlier instruction. + KillLaneMask = IsKill ? ~0u : DefLaneMask; + + // Clear undef flag, we'll re-add it later once we know which subregister + // Def is first. + MO.setIsUndef(false); + } else { + DefLaneMask = ~0u; + KillLaneMask = ~0u; + } + + if (MO.isDead()) { + assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && + "Dead defs should have no uses"); + } else { + // Add data dependence to all uses we found so far. + const TargetSubtargetInfo &ST = MF.getSubtarget(); + for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), + E = CurrentVRegUses.end(); I != E; /*empty*/) { + LaneBitmask LaneMask = I->LaneMask; + // Ignore uses of other lanes. + if ((LaneMask & KillLaneMask) == 0) { + ++I; + continue; + } + + if ((LaneMask & DefLaneMask) != 0) { + SUnit *UseSU = I->SU; + MachineInstr *Use = UseSU->getInstr(); + SDep Dep(SU, SDep::Data, Reg); + Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, + I->OperandIndex)); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + + LaneMask &= ~KillLaneMask; + // If we found a Def for all lanes of this use, remove it from the list. + if (LaneMask != 0) { + I->LaneMask = LaneMask; + ++I; + } else + I = CurrentVRegUses.erase(I); + } + } + + // Shortcut: Singly defined vregs do not have output/anti dependencies. + if (MRI.hasOneDef(Reg)) + return; + + // Add output dependence to the next nearest defs of this vreg. + // + // Unless this definition is dead, the output dependence should be + // transitively redundant with antidependencies from this definition's + // uses. We're conservative for now until we have a way to guarantee the uses + // are not eliminated sometime during scheduling. The output dependence edge + // is also useful if output latency exceeds def-use latency. + LaneBitmask LaneMask = DefLaneMask; + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for other lanes. + if ((V2SU.LaneMask & LaneMask) == 0) + continue; + // Add an output dependence. + SUnit *DefSU = V2SU.SU; + // Ignore additional defs of the same lanes in one instruction. This can + // happen because lanemasks are shared for targets with too many + // subregisters. We also use some representration tricks/hacks where we + // add super-register defs/uses, to imply that although we only access parts + // of the reg we care about the full one. + if (DefSU == SU) + continue; + SDep Dep(SU, SDep::Output, Reg); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + + // Update current definition. This can get tricky if the def was about a + // bigger lanemask before. We then have to shrink it and create a new + // VReg2SUnit for the non-overlapping part. + LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; + LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; + if (NonOverlapMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); + V2SU.SU = SU; + V2SU.LaneMask = OverlapMask; + } + // If there was no CurrentVRegDefs entry for some lanes yet, create one. + if (LaneMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); +} + +/// addVRegUseDeps - Add a register data dependency if the instruction that +/// defines the virtual register used at OperIdx is mapped to an SUnit. Add a +/// register antidependency from this SUnit to instructions that occur later in +/// the same scheduling region if they write the virtual register. +/// +/// TODO: Handle ExitSU "uses" properly. +void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + // Remember the use. Data dependencies will be added when we find the def. + LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; + CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); + + // Add antidependences to the following defs of the vreg. + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for unrelated lanes. + LaneBitmask PrevDefLaneMask = V2SU.LaneMask; + if ((PrevDefLaneMask & LaneMask) == 0) + continue; + if (V2SU.SU == SU) + continue; + + V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); + } +} + +/// Return true if MI is an instruction we are unable to reason about +/// (like a call or something with unmodeled side effects). +static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { + return MI->isCall() || MI->hasUnmodeledSideEffects() || + (MI->hasOrderedMemoryRef() && + (!MI->mayLoad() || !MI->isInvariantLoad(AA))); +} + +// This MI might have either incomplete info, or known to be unsafe +// to deal with (i.e. volatile object). +static inline bool isUnsafeMemoryObject(MachineInstr *MI, + const MachineFrameInfo *MFI, + const DataLayout &DL) { + if (!MI || MI->memoperands_empty()) + return true; + // We purposefully do no check for hasOneMemOperand() here + // in hope to trigger an assert downstream in order to + // finish implementation. + if ((*MI->memoperands_begin())->isVolatile() || + MI->hasUnmodeledSideEffects()) + return true; + + if ((*MI->memoperands_begin())->getPseudoValue()) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + return true; + } + + const Value *V = (*MI->memoperands_begin())->getValue(); + if (!V) + return true; + + SmallVector<Value *, 4> Objs; + getUnderlyingObjects(V, Objs, DL); + for (Value *V : Objs) { + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) + return true; + } + + return false; +} + +/// This returns true if the two MIs need a chain edge between them. +/// If these are not even memory operations, we still may need +/// chain deps between them. The question really is - could +/// these two MIs be reordered during scheduling from memory dependency +/// point of view. +static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, MachineInstr *MIa, + MachineInstr *MIb) { + const MachineFunction *MF = MIa->getParent()->getParent(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + + // Cover a trivial case - no edge is need to itself. + if (MIa == MIb) + return false; + + // Let the target decide if memory accesses cannot possibly overlap. + if ((MIa->mayLoad() || MIa->mayStore()) && + (MIb->mayLoad() || MIb->mayStore())) + if (TII->areMemAccessesTriviallyDisjoint(MIa, MIb, AA)) + return false; + + // FIXME: Need to handle multiple memory operands to support all targets. + if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) + return true; + + if (isUnsafeMemoryObject(MIa, MFI, DL) || isUnsafeMemoryObject(MIb, MFI, DL)) + return true; + + // If we are dealing with two "normal" loads, we do not need an edge + // between them - they could be reordered. + if (!MIa->mayStore() && !MIb->mayStore()) + return false; + + // To this point analysis is generic. From here on we do need AA. + if (!AA) + return true; + + MachineMemOperand *MMOa = *MIa->memoperands_begin(); + MachineMemOperand *MMOb = *MIb->memoperands_begin(); + + if (!MMOa->getValue() || !MMOb->getValue()) + return true; + + // The following interface to AA is fashioned after DAGCombiner::isAlias + // and operates with MachineMemOperand offset with some important + // assumptions: + // - LLVM fundamentally assumes flat address spaces. + // - MachineOperand offset can *only* result from legalization and + // cannot affect queries other than the trivial case of overlap + // checking. + // - These offsets never wrap and never step outside + // of allocated objects. + // - There should never be any negative offsets here. + // + // FIXME: Modify API to hide this math from "user" + // FIXME: Even before we go to AA we can reason locally about some + // memory objects. It can save compile time, and possibly catch some + // corner cases not currently covered. + + assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); + assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); + + int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); + int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; + int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; + + AliasResult AAResult = + AA->alias(MemoryLocation(MMOa->getValue(), Overlapa, + UseTBAA ? MMOa->getAAInfo() : AAMDNodes()), + MemoryLocation(MMOb->getValue(), Overlapb, + UseTBAA ? MMOb->getAAInfo() : AAMDNodes())); + + return (AAResult != NoAlias); +} + +/// This recursive function iterates over chain deps of SUb looking for +/// "latest" node that needs a chain edge to SUa. +static unsigned iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, SUnit *SUb, + SUnit *ExitSU, unsigned *Depth, + SmallPtrSetImpl<const SUnit *> &Visited) { + if (!SUa || !SUb || SUb == ExitSU) + return *Depth; + + // Remember visited nodes. + if (!Visited.insert(SUb).second) + return *Depth; + // If there is _some_ dependency already in place, do not + // descend any further. + // TODO: Need to make sure that if that dependency got eliminated or ignored + // for any reason in the future, we would not violate DAG topology. + // Currently it does not happen, but makes an implicit assumption about + // future implementation. + // + // Independently, if we encounter node that is some sort of global + // object (like a call) we already have full set of dependencies to it + // and we can stop descending. + if (SUa->isSucc(SUb) || + isGlobalMemoryObject(AA, SUb->getInstr())) + return *Depth; + + // If we do need an edge, or we have exceeded depth budget, + // add that edge to the predecessors chain of SUb, + // and stop descending. + if (*Depth > 200 || + MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { + SUb->addPred(SDep(SUa, SDep::MayAliasMem)); + return *Depth; + } + // Track current depth. + (*Depth)++; + // Iterate over memory dependencies only. + for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); + I != E; ++I) + if (I->isNormalMemoryOrBarrier()) + iterateChainSucc(AA, MFI, DL, SUa, I->getSUnit(), ExitSU, Depth, Visited); + return *Depth; +} + +/// This function assumes that "downward" from SU there exist +/// tail/leaf of already constructed DAG. It iterates downward and +/// checks whether SU can be aliasing any node dominated +/// by it. +static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SU, SUnit *ExitSU, + std::set<SUnit *> &CheckList, + unsigned LatencyToLoad) { + if (!SU) + return; + + SmallPtrSet<const SUnit*, 16> Visited; + unsigned Depth = 0; + + for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); + I != IE; ++I) { + if (SU == *I) + continue; + if (MIsNeedChainEdge(AA, MFI, DL, SU->getInstr(), (*I)->getInstr())) { + SDep Dep(SU, SDep::MayAliasMem); + Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); + (*I)->addPred(Dep); + } + + // Iterate recursively over all previously added memory chain + // successors. Keep track of visited nodes. + for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), + JE = (*I)->Succs.end(); J != JE; ++J) + if (J->isNormalMemoryOrBarrier()) + iterateChainSucc(AA, MFI, DL, SU, J->getSUnit(), ExitSU, &Depth, + Visited); + } +} + +/// Check whether two objects need a chain edge, if so, add it +/// otherwise remember the rejected SU. +static inline void addChainDependency(AliasAnalysis *AA, + const MachineFrameInfo *MFI, + const DataLayout &DL, SUnit *SUa, + SUnit *SUb, std::set<SUnit *> &RejectList, + unsigned TrueMemOrderLatency = 0, + bool isNormalMemory = false) { + // If this is a false dependency, + // do not add the edge, but remember the rejected node. + if (MIsNeedChainEdge(AA, MFI, DL, SUa->getInstr(), SUb->getInstr())) { + SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + SUb->addPred(Dep); + } + else { + // Duplicate entries should be ignored. + RejectList.insert(SUb); + DEBUG(dbgs() << "\tReject chain dep between SU(" + << SUa->NodeNum << ") and SU(" + << SUb->NodeNum << ")\n"); + } +} + +/// Create an SUnit for each real instruction, numbered in top-down topological +/// order. The instruction order A < B, implies that no edge exists from B to A. +/// +/// Map each real instruction to its SUnit. +/// +/// After initSUnits, the SUnits vector cannot be resized and the scheduler may +/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs +/// instead of pointers. +/// +/// MachineScheduler relies on initSUnits numbering the nodes by their order in +/// the original instruction list. +void ScheduleDAGInstrs::initSUnits() { + // We'll be allocating one SUnit for each real instruction in the region, + // which is contained within a basic block. + SUnits.reserve(NumRegionInstrs); + + for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SUnit *SU = newSUnit(MI); + MISUnitMap[MI] = SU; + + SU->isCall = MI->isCall(); + SU->isCommutable = MI->isCommutable(); + + // Assign the Latency field of SU using target-provided information. + SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); + + // If this SUnit uses a reserved or unbuffered resource, mark it as such. + // + // Reserved resources block an instruction from issuing and stall the + // entire pipeline. These are identified by BufferSize=0. + // + // Unbuffered resources prevent execution of subsequent instructions that + // require the same resources. This is used for in-order execution pipelines + // within an out-of-order core. These are identified by BufferSize=1. + if (SchedModel.hasInstrSchedModel()) { + const MCSchedClassDesc *SC = getSchedClass(SU); + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) { + case 0: + SU->hasReservedResource = true; + break; + case 1: + SU->isUnbuffered = true; + break; + default: + break; + } + } + } + } +} + +void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, 0, SU)); + } +} + +/// If RegPressure is non-null, compute register pressure as a side effect. The +/// DAG builder is an efficient place to do it because it already visits +/// operands. +void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, + RegPressureTracker *RPTracker, + PressureDiffs *PDiffs, + bool TrackLaneMasks) { + const TargetSubtargetInfo &ST = MF.getSubtarget(); + bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI + : ST.useAA(); + AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + + this->TrackLaneMasks = TrackLaneMasks; + MISUnitMap.clear(); + ScheduleDAG::clearDAG(); + + // Create an SUnit for each real instruction. + initSUnits(); + + if (PDiffs) + PDiffs->init(SUnits.size()); + + // We build scheduling units by walking a block's instruction list from bottom + // to top. + + // Remember where a generic side-effecting instruction is as we proceed. + SUnit *BarrierChain = nullptr, *AliasChain = nullptr; + + // Memory references to specific known memory locations are tracked + // so that they can be given more precise dependencies. We track + // separately the known memory locations that may alias and those + // that are known not to alias + MapVector<ValueType, std::vector<SUnit *> > AliasMemDefs, NonAliasMemDefs; + MapVector<ValueType, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; + std::set<SUnit*> RejectMemNodes; + + // Remove any stale debug info; sometimes BuildSchedGraph is called again + // without emitting the info from the previous call. + DbgValues.clear(); + FirstDbgValue = nullptr; + + assert(Defs.empty() && Uses.empty() && + "Only BuildGraph should update Defs/Uses"); + Defs.setUniverse(TRI->getNumRegs()); + Uses.setUniverse(TRI->getNumRegs()); + + assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); + assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + CurrentVRegDefs.setUniverse(NumVirtRegs); + CurrentVRegUses.setUniverse(NumVirtRegs); + + VRegUses.clear(); + VRegUses.setUniverse(NumVirtRegs); + + // Model data dependencies between instructions being scheduled and the + // ExitSU. + addSchedBarrierDeps(); + + // Walk the list of instructions, from bottom moving up. + MachineInstr *DbgMI = nullptr; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; + MII != MIE; --MII) { + MachineInstr *MI = std::prev(MII); + if (MI && DbgMI) { + DbgValues.push_back(std::make_pair(DbgMI, MI)); + DbgMI = nullptr; + } + + if (MI->isDebugValue()) { + DbgMI = MI; + continue; + } + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + + if (RPTracker) { + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr; + RPTracker->recede(/*LiveUses=*/nullptr, PDiff); + assert(RPTracker->getPos() == std::prev(MII) && + "RPTracker can't find MI"); + collectVRegUses(SU); + } + + assert( + (CanHandleTerminators || (!MI->isTerminator() && !MI->isPosition())) && + "Cannot schedule terminators or labels!"); + + // Add register-based dependencies (data, anti, and output). + bool HasVRegDef = false; + for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { + const MachineOperand &MO = MI->getOperand(j); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + + if (TRI->isPhysicalRegister(Reg)) + addPhysRegDeps(SU, j); + else { + if (MO.isDef()) { + HasVRegDef = true; + addVRegDefDeps(SU, j); + } + else if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(SU, j); + } + } + // If we haven't seen any uses in this scheduling region, create a + // dependence edge to ExitSU to model the live-out latency. This is required + // for vreg defs with no in-region use, and prefetches with no vreg def. + // + // FIXME: NumDataSuccs would be more precise than NumSuccs here. This + // check currently relies on being called before adding chain deps. + if (SU->NumSuccs == 0 && SU->Latency > 1 + && (HasVRegDef || MI->mayLoad())) { + SDep Dep(SU, SDep::Artificial); + Dep.setLatency(SU->Latency - 1); + ExitSU.addPred(Dep); + } + + // Add chain dependencies. + // Chain dependencies used to enforce memory order should have + // latency of 0 (except for true dependency of Store followed by + // aliased Load... we estimate that with a single cycle of latency + // assuming the hardware will bypass) + // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable + // after stack slots are lowered to actual addresses. + // TODO: Use an AliasAnalysis and do real alias-analysis queries, and + // produce more precise dependence information. + unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; + if (isGlobalMemoryObject(AA, MI)) { + // Be conservative with these and add dependencies on all memory + // references, even those that are known to not alias. + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = + NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + I->second[i]->addPred(SDep(SU, SDep::Barrier)); + } + } + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = + NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) { + SDep Dep(SU, SDep::Barrier); + Dep.setLatency(TrueMemOrderLatency); + I->second[i]->addPred(Dep); + } + } + // Add SU to the barrier chain. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + BarrierChain = SU; + // This is a barrier event that acts as a pivotal node in the DAG, + // so it is safe to clear list of exposed nodes. + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + RejectMemNodes.clear(); + NonAliasMemDefs.clear(); + NonAliasMemUses.clear(); + + // fall-through + new_alias_chain: + // Chain all possibly aliasing memory references through SU. + if (AliasChain) { + unsigned ChainLatency = 0; + if (AliasChain->getInstr()->mayLoad()) + ChainLatency = TrueMemOrderLatency; + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, + RejectMemNodes, ChainLatency); + } + AliasChain = SU; + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes); + } + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = + AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes, TrueMemOrderLatency); + } + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + PendingLoads.clear(); + AliasMemDefs.clear(); + AliasMemUses.clear(); + } else if (MI->mayStore()) { + // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + + UnderlyingObjectsVector Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); + + if (Objs.empty()) { + // Treat all other stores conservatively. + goto new_alias_chain; + } + + bool MayAlias = false; + for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); + K != KE; ++K) { + ValueType V = K->getPointer(); + bool ThisMayAlias = K->getInt(); + if (ThisMayAlias) + MayAlias = true; + + // A store to a specific PseudoSourceValue. Add precise dependencies. + // Record the def in MemDefs, first adding a dep if there is + // an existing def. + MapVector<ValueType, std::vector<SUnit *> >::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector<ValueType, std::vector<SUnit *> >::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) { + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); + + // If we're not using AA, then we only need one store per object. + if (!AAForDep) + I->second.clear(); + I->second.push_back(SU); + } else { + if (ThisMayAlias) { + if (!AAForDep) + AliasMemDefs[V].clear(); + AliasMemDefs[V].push_back(SU); + } else { + if (!AAForDep) + NonAliasMemDefs[V].clear(); + NonAliasMemDefs[V].push_back(SU); + } + } + // Handle the uses in MemUses, if there are any. + MapVector<ValueType, std::vector<SUnit *> >::iterator J = + ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + MapVector<ValueType, std::vector<SUnit *> >::iterator JE = + ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + if (J != JE) { + for (unsigned i = 0, e = J->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + J->second[i], RejectMemNodes, + TrueMemOrderLatency, true); + J->second.clear(); + } + } + if (MayAlias) { + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + // Add dependence on alias chain, if needed. + if (AliasChain) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, + RejectMemNodes); + } + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + } else if (MI->mayLoad()) { + bool MayAlias = true; + if (MI->isInvariantLoad(AA)) { + // Invariant load, no chain dependencies needed! + } else { + UnderlyingObjectsVector Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs, MF.getDataLayout()); + + if (Objs.empty()) { + // A load with no underlying object. Depend on all + // potentially aliasing stores. + for (MapVector<ValueType, std::vector<SUnit *> >::iterator I = + AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes); + + PendingLoads.push_back(SU); + MayAlias = true; + } else { + MayAlias = false; + } + + for (UnderlyingObjectsVector::iterator + J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { + ValueType V = J->getPointer(); + bool ThisMayAlias = J->getInt(); + + if (ThisMayAlias) + MayAlias = true; + + // A load from a specific PseudoSourceValue. Add precise dependencies. + MapVector<ValueType, std::vector<SUnit *> >::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector<ValueType, std::vector<SUnit *> >::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + for (unsigned i = 0, e = I->second.size(); i != e; ++i) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, + I->second[i], RejectMemNodes, 0, true); + if (ThisMayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); + } + if (MayAlias) + adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, + RejectMemNodes, /*Latency=*/0); + // Add dependencies on alias and barrier chains, if needed. + if (MayAlias && AliasChain) + addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain, + RejectMemNodes); + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); + } + } + } + if (DbgMI) + FirstDbgValue = DbgMI; + + Defs.clear(); + Uses.clear(); + CurrentVRegDefs.clear(); + CurrentVRegUses.clear(); + PendingLoads.clear(); +} + +/// \brief Initialize register live-range state for updating kills. +void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) { + // Start with no live registers. + LiveRegs.reset(); + + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (const auto &LI : (*SI)->liveins()) { + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +/// \brief If we change a kill flag on the bundle instruction implicit register +/// operands, then we also need to propagate that to any instructions inside +/// the bundle which had the same kill state. +static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg, + bool NewKillState) { + if (MI->getOpcode() != TargetOpcode::BUNDLE) + return; + + // Walk backwards from the last instruction in the bundle to the first. + // Once we set a kill flag on an instruction, we bail out, as otherwise we + // might set it on too many operands. We will clear as many flags as we + // can though. + MachineBasicBlock::instr_iterator Begin = MI->getIterator(); + MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + while (Begin != End) { + for (MachineOperand &MO : (--End)->operands()) { + if (!MO.isReg() || MO.isDef() || Reg != MO.getReg()) + continue; + + // DEBUG_VALUE nodes do not contribute to code generation and should + // always be ignored. Failure to do so may result in trying to modify + // KILL flags on DEBUG_VALUE nodes, which is distressing. + if (MO.isDebug()) + continue; + + // If the register has the internal flag then it could be killing an + // internal def of the register. In this case, just skip. We only want + // to toggle the flag on operands visible outside the bundle. + if (MO.isInternalRead()) + continue; + + if (MO.isKill() == NewKillState) + continue; + MO.setIsKill(NewKillState); + if (NewKillState) + return; + } + } +} + +bool ScheduleDAGInstrs::toggleKillFlag(MachineInstr *MI, MachineOperand &MO) { + // Setting kill flag... + if (!MO.isKill()) { + MO.setIsKill(true); + toggleBundleKillFlag(MI, MO.getReg(), true); + return false; + } + + // If MO itself is live, clear the kill flag... + if (LiveRegs.test(MO.getReg())) { + MO.setIsKill(false); + toggleBundleKillFlag(MI, MO.getReg(), false); + return false; + } + + // If any subreg of MO is live, then create an imp-def for that + // subreg and keep MO marked as killed. + MO.setIsKill(false); + toggleBundleKillFlag(MI, MO.getReg(), false); + bool AllDead = true; + const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); + for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + MIB.addReg(*SubRegs, RegState::ImplicitDefine); + AllDead = false; + } + } + + if(AllDead) { + MO.setIsKill(true); + toggleBundleKillFlag(MI, MO.getReg(), true); + } + return false; +} + +// FIXME: Reuse the LivePhysRegs utility for this. +void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) { + DEBUG(dbgs() << "Fixup kills for BB#" << MBB->getNumber() << '\n'); + + LiveRegs.resize(TRI->getNumRegs()); + BitVector killedRegs(TRI->getNumRegs()); + + startBlockForKills(MBB); + + // Examine block from end to start... + unsigned Count = MBB->size(); + for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin(); + I != E; --Count) { + MachineInstr *MI = --I; + if (MI->isDebugValue()) + continue; + + // Update liveness. Registers that are defed but not used in this + // instruction are now dead. Mark register and all subregs as they + // are completely defined. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isRegMask()) + LiveRegs.clearBitsNotInMask(MO.getRegMask()); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.reset(*SubRegs); + } + + // Examine all used registers and set/clear kill flag. When a + // register is used multiple times we only set the kill flag on + // the first use. Don't set kill flags on undef operands. + killedRegs.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + bool kill = false; + if (!killedRegs.test(Reg)) { + kill = true; + // A register is not killed if any subregs are live... + for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + if (LiveRegs.test(*SubRegs)) { + kill = false; + break; + } + } + + // If subreg is not live, then register is killed if it became + // live in this instruction + if (kill) + kill = !LiveRegs.test(Reg); + } + + if (MO.isKill() != kill) { + DEBUG(dbgs() << "Fixing " << MO << " in "); + // Warning: toggleKillFlag may invalidate MO. + toggleKillFlag(MI, MO); + DEBUG(MI->dump()); + DEBUG(if (MI->getOpcode() == TargetOpcode::BUNDLE) { + MachineBasicBlock::instr_iterator Begin = MI->getIterator(); + MachineBasicBlock::instr_iterator End = getBundleEnd(MI); + while (++Begin != End) + DEBUG(Begin->dump()); + }); + } + + killedRegs.set(Reg); + } + + // Mark any used register (that is not using undef) and subregs as + // now live... + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; + unsigned Reg = MO.getReg(); + if ((Reg == 0) || MRI.isReserved(Reg)) continue; + + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) + LiveRegs.set(*SubRegs); + } + } +} + +void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + SU->getInstr()->dump(); +#endif +} + +std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream oss(s); + if (SU == &EntrySU) + oss << "<entry>"; + else if (SU == &ExitSU) + oss << "<exit>"; + else + SU->getInstr()->print(oss, /*SkipOpers=*/true); + return oss.str(); +} + +/// Return the basic block label. It is not necessarilly unique because a block +/// contains multiple scheduling regions. But it is fine for visualization. +std::string ScheduleDAGInstrs::getDAGName() const { + return "dag." + BB->getFullName(); +} + +//===----------------------------------------------------------------------===// +// SchedDFSResult Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { +/// \brief Internal state used to compute SchedDFSResult. +class SchedDFSImpl { + SchedDFSResult &R; + + /// Join DAG nodes into equivalence classes by their subtree. + IntEqClasses SubtreeClasses; + /// List PredSU, SuccSU pairs that represent data edges between subtrees. + std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; + + struct RootData { + unsigned NodeID; + unsigned ParentNodeID; // Parent node (member of the parent subtree). + unsigned SubInstrCount; // Instr count in this tree only, not children. + + RootData(unsigned id): NodeID(id), + ParentNodeID(SchedDFSResult::InvalidSubtreeID), + SubInstrCount(0) {} + + unsigned getSparseSetIndex() const { return NodeID; } + }; + + SparseSet<RootData> RootSet; + +public: + SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { + RootSet.setUniverse(R.DFSNodeData.size()); + } + + /// Return true if this node been visited by the DFS traversal. + /// + /// During visitPostorderNode the Node's SubtreeID is assigned to the Node + /// ID. Later, SubtreeID is updated but remains valid. + bool isVisited(const SUnit *SU) const { + return R.DFSNodeData[SU->NodeNum].SubtreeID + != SchedDFSResult::InvalidSubtreeID; + } + + /// Initialize this node's instruction count. We don't need to flag the node + /// visited until visitPostorder because the DAG cannot have cycles. + void visitPreorder(const SUnit *SU) { + R.DFSNodeData[SU->NodeNum].InstrCount = + SU->getInstr()->isTransient() ? 0 : 1; + } + + /// Called once for each node after all predecessors are visited. Revisit this + /// node's predecessors and potentially join them now that we know the ILP of + /// the other predecessors. + void visitPostorderNode(const SUnit *SU) { + // Mark this node as the root of a subtree. It may be joined with its + // successors later. + R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; + RootData RData(SU->NodeNum); + RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; + + // If any predecessors are still in their own subtree, they either cannot be + // joined or are large enough to remain separate. If this parent node's + // total instruction count is not greater than a child subtree by at least + // the subtree limit, then try to join it now since splitting subtrees is + // only useful if multiple high-pressure paths are possible. + unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->getKind() != SDep::Data) + continue; + unsigned PredNum = PI->getSUnit()->NodeNum; + if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) + joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + + // Either link or merge the TreeData entry from the child to the parent. + if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { + // If the predecessor's parent is invalid, this is a tree edge and the + // current node is the parent. + if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) + RootSet[PredNum].ParentNodeID = SU->NodeNum; + } + else if (RootSet.count(PredNum)) { + // The predecessor is not a root, but is still in the root set. This + // must be the new parent that it was just joined to. Note that + // RootSet[PredNum].ParentNodeID may either be invalid or may still be + // set to the original parent. + RData.SubInstrCount += RootSet[PredNum].SubInstrCount; + RootSet.erase(PredNum); + } + } + RootSet[SU->NodeNum] = RData; + } + + /// Called once for each tree edge after calling visitPostOrderNode on the + /// predecessor. Increment the parent node's instruction count and + /// preemptively join this subtree to its parent's if it is small enough. + void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { + R.DFSNodeData[Succ->NodeNum].InstrCount + += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; + joinPredSubtree(PredDep, Succ); + } + + /// Add a connection for cross edges. + void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { + ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); + } + + /// Set each node's subtree ID to the representative ID and record connections + /// between trees. + void finalize() { + SubtreeClasses.compress(); + R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); + assert(SubtreeClasses.getNumClasses() == RootSet.size() + && "number of roots should match trees"); + for (SparseSet<RootData>::const_iterator + RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { + unsigned TreeID = SubtreeClasses[RI->NodeID]; + if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) + R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; + R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; + // Note that SubInstrCount may be greater than InstrCount if we joined + // subtrees across a cross edge. InstrCount will be attributed to the + // original parent, while SubInstrCount will be attributed to the joined + // parent. + } + R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); + R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); + DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); + for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { + R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; + DEBUG(dbgs() << " SU(" << Idx << ") in tree " + << R.DFSNodeData[Idx].SubtreeID << '\n'); + } + for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator + I = ConnectionPairs.begin(), E = ConnectionPairs.end(); + I != E; ++I) { + unsigned PredTree = SubtreeClasses[I->first->NodeNum]; + unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; + if (PredTree == SuccTree) + continue; + unsigned Depth = I->first->getDepth(); + addConnection(PredTree, SuccTree, Depth); + addConnection(SuccTree, PredTree, Depth); + } + } + +protected: + /// Join the predecessor subtree with the successor that is its DFS + /// parent. Apply some heuristics before joining. + bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, + bool CheckLimit = true) { + assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); + + // Check if the predecessor is already joined. + const SUnit *PredSU = PredDep.getSUnit(); + unsigned PredNum = PredSU->NodeNum; + if (R.DFSNodeData[PredNum].SubtreeID != PredNum) + return false; + + // Four is the magic number of successors before a node is considered a + // pinch point. + unsigned NumDataSucs = 0; + for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), + SE = PredSU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data) { + if (++NumDataSucs >= 4) + return false; + } + } + if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) + return false; + R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; + SubtreeClasses.join(Succ->NodeNum, PredNum); + return true; + } + + /// Called by finalize() to record a connection between trees. + void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { + if (!Depth) + return; + + do { + SmallVectorImpl<SchedDFSResult::Connection> &Connections = + R.SubtreeConnections[FromTree]; + for (SmallVectorImpl<SchedDFSResult::Connection>::iterator + I = Connections.begin(), E = Connections.end(); I != E; ++I) { + if (I->TreeID == ToTree) { + I->Level = std::max(I->Level, Depth); + return; + } + } + Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + FromTree = R.DFSTreeData[FromTree].ParentTreeID; + } while (FromTree != SchedDFSResult::InvalidSubtreeID); + } +}; +} // namespace llvm + +namespace { +/// \brief Manage the stack used by a reverse depth-first search over the DAG. +class SchedDAGReverseDFS { + std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; +public: + bool isComplete() const { return DFSStack.empty(); } + + void follow(const SUnit *SU) { + DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); + } + void advance() { ++DFSStack.back().second; } + + const SDep *backtrack() { + DFSStack.pop_back(); + return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second); + } + + const SUnit *getCurr() const { return DFSStack.back().first; } + + SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } + + SUnit::const_pred_iterator getPredEnd() const { + return getCurr()->Preds.end(); + } +}; +} // anonymous + +static bool hasDataSucc(const SUnit *SU) { + for (SUnit::const_succ_iterator + SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) + return true; + } + return false; +} + +/// Compute an ILP metric for all nodes in the subDAG reachable via depth-first +/// search from this root. +void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { + if (!IsBottomUp) + llvm_unreachable("Top-down ILP metric is unimplemnted"); + + SchedDFSImpl Impl(*this); + for (ArrayRef<SUnit>::const_iterator + SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { + const SUnit *SU = &*SI; + if (Impl.isVisited(SU) || hasDataSucc(SU)) + continue; + + SchedDAGReverseDFS DFS; + Impl.visitPreorder(SU); + DFS.follow(SU); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SDep &PredDep = *DFS.getPred(); + DFS.advance(); + // Ignore non-data edges. + if (PredDep.getKind() != SDep::Data + || PredDep.getSUnit()->isBoundaryNode()) { + continue; + } + // An already visited edge is a cross edge, assuming an acyclic DAG. + if (Impl.isVisited(PredDep.getSUnit())) { + Impl.visitCrossEdge(PredDep, DFS.getCurr()); + continue; + } + Impl.visitPreorder(PredDep.getSUnit()); + DFS.follow(PredDep.getSUnit()); + } + // Visit the top of the stack in postorder and backtrack. + const SUnit *Child = DFS.getCurr(); + const SDep *PredDep = DFS.backtrack(); + Impl.visitPostorderNode(Child); + if (PredDep) + Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); + if (DFS.isComplete()) + break; + } + } + Impl.finalize(); +} + +/// The root of the given SubtreeID was just scheduled. For all subtrees +/// connected to this tree, record the depth of the connection so that the +/// nearest connected subtrees can be prioritized. +void SchedDFSResult::scheduleTree(unsigned SubtreeID) { + for (SmallVectorImpl<Connection>::const_iterator + I = SubtreeConnections[SubtreeID].begin(), + E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { + SubtreeConnectLevels[I->TreeID] = + std::max(SubtreeConnectLevels[I->TreeID], I->Level); + DEBUG(dbgs() << " Tree: " << I->TreeID + << " @" << SubtreeConnectLevels[I->TreeID] << '\n'); + } +} + +LLVM_DUMP_METHOD +void ILPValue::print(raw_ostream &OS) const { + OS << InstrCount << " / " << Length << " = "; + if (!Length) + OS << "BADILP"; + else + OS << format("%g", ((double)InstrCount / Length)); +} + +LLVM_DUMP_METHOD +void ILPValue::dump() const { + dbgs() << *this << '\n'; +} + +namespace llvm { + +LLVM_DUMP_METHOD +raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { + Val.print(OS); + return OS; +} + +} // namespace llvm diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp new file mode 100644 index 0000000..1150d26 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -0,0 +1,101 @@ +//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <fstream> +using namespace llvm; + +namespace llvm { + template<> + struct DOTGraphTraits<ScheduleDAG*> : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + + static std::string getNodeIdentifierLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + std::string R; + raw_string_ostream OS(R); + OS << static_cast<const void *>(Node); + return R; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(ScheduleDAG *G, + GraphWriter<ScheduleDAG*> &GW) { + return G->addCustomGraphFeatures(GW); + } + }; +} + +std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU, + const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); +} + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAG::viewGraph(const Twine &Name, const Twine &Title) { + // This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAG::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp new file mode 100644 index 0000000..38833a4 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -0,0 +1,249 @@ +//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScoreboardHazardRecognizer class, which +// encapsultes hazard-avoidance heuristics for scheduling, based on the +// scheduling itineraries specified for the target. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType + +#ifndef NDEBUG +const char *ScoreboardHazardRecognizer::DebugType = ""; +#endif + +ScoreboardHazardRecognizer:: +ScoreboardHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *SchedDAG, + const char *ParentDebugType) : + ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0), + IssueCount(0) { + +#ifndef NDEBUG + DebugType = ParentDebugType; +#endif + + // Determine the maximum depth of any itinerary. This determines the depth of + // the scoreboard. We always make the scoreboard at least 1 cycle deep to + // avoid dealing with the boundary condition. + unsigned ScoreboardDepth = 1; + if (ItinData && !ItinData->isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData->isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData->beginStage(idx); + const InstrStage *E = ItinData->endStage(idx); + unsigned CurCycle = 0; + unsigned ItinDepth = 0; + for (; IS != E; ++IS) { + unsigned StageDepth = CurCycle + IS->getCycles(); + if (ItinDepth < StageDepth) ItinDepth = StageDepth; + CurCycle += IS->getNextCycles(); + } + + // Find the next power-of-2 >= ItinDepth + while (ItinDepth > ScoreboardDepth) { + ScoreboardDepth *= 2; + // Don't set MaxLookAhead until we find at least one nonzero stage. + // This way, an itinerary with no stages has MaxLookAhead==0, which + // completely bypasses the scoreboard hazard logic. + MaxLookAhead = ScoreboardDepth; + } + } + } + + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); + + // If MaxLookAhead is not set above, then we are not enabled. + if (!isEnabled()) + DEBUG(dbgs() << "Disabled scoreboard hazard recognizer\n"); + else { + // A nonempty itinerary must have a SchedModel. + IssueWidth = ItinData->SchedModel.IssueWidth; + DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = " + << ScoreboardDepth << '\n'); + } +} + +void ScoreboardHazardRecognizer::Reset() { + IssueCount = 0; + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScoreboardHazardRecognizer::Scoreboard::dump() const { + dbgs() << "Scoreboard:\n"; + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = (*this)[i]; + dbgs() << "\t"; + for (int j = 31; j >= 0; j--) + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; + } +} +#endif + +bool ScoreboardHazardRecognizer::atIssueLimit() const { + if (IssueWidth == 0) + return false; + + return IssueCount == IssueWidth; +} + +ScheduleHazardRecognizer::HazardType +ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + if (!ItinData || ItinData->isEmpty()) + return NoHazard; + + // Note that stalls will be negative for bottom-up scheduling. + int cycle = Stalls; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + if (!MCID) { + // Don't check hazards for non-machineinstr Nodes. + return NoHazard; + } + unsigned idx = MCID->getSchedClass(); + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + int StageCycle = cycle + (int)i; + if (StageCycle < 0) + continue; + + if (StageCycle >= (int)RequiredScoreboard.getDepth()) { + assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() && + "Scoreboard depth exceeded!"); + // This stage was stalled beyond pipeline depth, so cannot conflict. + break; + } + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[StageCycle]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[StageCycle]; + break; + } + + if (!freeUnits) { + DEBUG(dbgs() << "*** Hazard in cycle +" << StageCycle << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); + DEBUG(DAG->dumpNode(SU)); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { + if (!ItinData || ItinData->isEmpty()) + return; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + const MCInstrDesc *MCID = DAG->getInstrDesc(SU); + assert(MCID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(MCID->Opcode)) + return; + + ++IssueCount; + + unsigned cycle = 0; + + unsigned idx = MCID->getSchedClass(); + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); +} + +void ScoreboardHazardRecognizer::AdvanceCycle() { + IssueCount = 0; + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); +} + +void ScoreboardHazardRecognizer::RecedeCycle() { + IssueCount = 0; + ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0; + ReservedScoreboard.recede(); + RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0; + RequiredScoreboard.recede(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp new file mode 100644 index 0000000..bc2405b9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -0,0 +1,14796 @@ +//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run +// both before and after the DAG is legalized. +// +// This pass is not a substitute for the LLVM IR instcombine pass. This pass is +// primarily intended to handle simplification opportunities that are implicit +// in the LLVM IR and exposed by the various codegen lowering phases. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "dagcombine" + +STATISTIC(NodesCombined , "Number of dag nodes combined"); +STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); +STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); +STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); +STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); + +namespace { + static cl::opt<bool> + CombinerAA("combiner-alias-analysis", cl::Hidden, + cl::desc("Enable DAG combiner alias-analysis heuristics")); + + static cl::opt<bool> + CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, + cl::desc("Enable DAG combiner's use of IR alias analysis")); + + static cl::opt<bool> + UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), + cl::desc("Enable DAG combiner's use of TBAA")); + +#ifndef NDEBUG + static cl::opt<std::string> + CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, + cl::desc("Only use DAG-combiner alias analysis in this" + " function")); +#endif + + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt<bool> + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + + static cl::opt<bool> + MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), + cl::desc("DAG combiner may split indexing from loads")); + +//------------------------------ DAGCombiner ---------------------------------// + + class DAGCombiner { + SelectionDAG &DAG; + const TargetLowering &TLI; + CombineLevel Level; + CodeGenOpt::Level OptLevel; + bool LegalOperations; + bool LegalTypes; + bool ForCodeSize; + + /// \brief Worklist of all of the nodes that need to be simplified. + /// + /// This must behave as a stack -- new nodes to process are pushed onto the + /// back and when processing we pop off of the back. + /// + /// The worklist will not contain duplicates but may contain null entries + /// due to nodes being deleted from the underlying DAG. + SmallVector<SDNode *, 64> Worklist; + + /// \brief Mapping from an SDNode to its position on the worklist. + /// + /// This is used to find and remove nodes from the worklist (by nulling + /// them) when they are deleted from the underlying DAG. It relies on + /// stable indices of nodes within the worklist. + DenseMap<SDNode *, unsigned> WorklistMap; + + /// \brief Set of nodes which have been combined (at least once). + /// + /// This is used to allow us to reliably add any operands of a DAG node + /// which have not yet been combined to the worklist. + SmallPtrSet<SDNode *, 64> CombinedNodes; + + // AA - Used for DAG load/store alias analysis. + AliasAnalysis &AA; + + /// When an instruction is simplified, add all users of the instruction to + /// the work lists because they might get more simplified now. + void AddUsersToWorklist(SDNode *N) { + for (SDNode *Node : N->uses()) + AddToWorklist(Node); + } + + /// Call the node-specific routine that folds each particular type of node. + SDValue visit(SDNode *N); + + public: + /// Add to the worklist making sure its instance is at the back (next to be + /// processed.) + void AddToWorklist(SDNode *N) { + // Skip handle nodes as they can't usefully be combined and confuse the + // zero-use deletion strategy. + if (N->getOpcode() == ISD::HANDLENODE) + return; + + if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) + Worklist.push_back(N); + } + + /// Remove all instances of N from the worklist. + void removeFromWorklist(SDNode *N) { + CombinedNodes.erase(N); + + auto It = WorklistMap.find(N); + if (It == WorklistMap.end()) + return; // Not in the worklist. + + // Null out the entry rather than erasing it to avoid a linear operation. + Worklist[It->second] = nullptr; + WorklistMap.erase(It); + } + + void deleteAndRecombine(SDNode *N); + bool recursivelyDeleteUnusedNodes(SDNode *N); + + /// Replaces all uses of the results of one DAG node with new values. + SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo = true); + + /// Replaces all uses of the results of one DAG node with new values. + SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { + return CombineTo(N, &Res, 1, AddTo); + } + + /// Replaces all uses of the results of one DAG node with new values. + SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, + bool AddTo = true) { + SDValue To[] = { Res0, Res1 }; + return CombineTo(N, To, 2, AddTo); + } + + void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); + + private: + + /// Check the specified integer node value to see if it can be simplified or + /// if things it uses can be simplified by bit propagation. + /// If so, return true. + bool SimplifyDemandedBits(SDValue Op) { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + return SimplifyDemandedBits(Op, Demanded); + } + + bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); + + bool CombineToPreIndexedLoadStore(SDNode *N); + bool CombineToPostIndexedLoadStore(SDNode *N); + SDValue SplitIndexingFromLoad(LoadSDNode *LD); + bool SliceUpLoad(SDNode *N); + + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); + void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); + SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); + SDValue SExtPromoteOperand(SDValue Op, EVT PVT); + SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); + SDValue PromoteIntBinOp(SDValue Op); + SDValue PromoteIntShiftOp(SDValue Op); + SDValue PromoteExtend(SDValue Op); + bool PromoteLoad(SDValue Op); + + void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, + ISD::NodeType ExtType); + + /// Call the node-specific routine that knows how to fold each + /// particular type of node. If that doesn't do anything, try the + /// target-specific DAG combines. + SDValue combine(SDNode *N); + + // Visitation implementation - Implement dag node combining for different + // node types. The semantics are as follows: + // Return Value: + // SDValue.getNode() == 0 - No change was made + // SDValue.getNode() == N - N was replaced, is dead and has been handled. + // otherwise - N should be replaced by the returned Operand. + // + SDValue visitTokenFactor(SDNode *N); + SDValue visitMERGE_VALUES(SDNode *N); + SDValue visitADD(SDNode *N); + SDValue visitSUB(SDNode *N); + SDValue visitADDC(SDNode *N); + SDValue visitSUBC(SDNode *N); + SDValue visitADDE(SDNode *N); + SDValue visitSUBE(SDNode *N); + SDValue visitMUL(SDNode *N); + SDValue useDivRem(SDNode *N); + SDValue visitSDIV(SDNode *N); + SDValue visitUDIV(SDNode *N); + SDValue visitREM(SDNode *N); + SDValue visitMULHU(SDNode *N); + SDValue visitMULHS(SDNode *N); + SDValue visitSMUL_LOHI(SDNode *N); + SDValue visitUMUL_LOHI(SDNode *N); + SDValue visitSMULO(SDNode *N); + SDValue visitUMULO(SDNode *N); + SDValue visitIMINMAX(SDNode *N); + SDValue visitAND(SDNode *N); + SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitOR(SDNode *N); + SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); + SDValue visitXOR(SDNode *N); + SDValue SimplifyVBinOp(SDNode *N); + SDValue visitSHL(SDNode *N); + SDValue visitSRA(SDNode *N); + SDValue visitSRL(SDNode *N); + SDValue visitRotate(SDNode *N); + SDValue visitBSWAP(SDNode *N); + SDValue visitCTLZ(SDNode *N); + SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); + SDValue visitCTTZ(SDNode *N); + SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); + SDValue visitCTPOP(SDNode *N); + SDValue visitSELECT(SDNode *N); + SDValue visitVSELECT(SDNode *N); + SDValue visitSELECT_CC(SDNode *N); + SDValue visitSETCC(SDNode *N); + SDValue visitSETCCE(SDNode *N); + SDValue visitSIGN_EXTEND(SDNode *N); + SDValue visitZERO_EXTEND(SDNode *N); + SDValue visitANY_EXTEND(SDNode *N); + SDValue visitSIGN_EXTEND_INREG(SDNode *N); + SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); + SDValue visitTRUNCATE(SDNode *N); + SDValue visitBITCAST(SDNode *N); + SDValue visitBUILD_PAIR(SDNode *N); + SDValue visitFADD(SDNode *N); + SDValue visitFSUB(SDNode *N); + SDValue visitFMUL(SDNode *N); + SDValue visitFMA(SDNode *N); + SDValue visitFDIV(SDNode *N); + SDValue visitFREM(SDNode *N); + SDValue visitFSQRT(SDNode *N); + SDValue visitFCOPYSIGN(SDNode *N); + SDValue visitSINT_TO_FP(SDNode *N); + SDValue visitUINT_TO_FP(SDNode *N); + SDValue visitFP_TO_SINT(SDNode *N); + SDValue visitFP_TO_UINT(SDNode *N); + SDValue visitFP_ROUND(SDNode *N); + SDValue visitFP_ROUND_INREG(SDNode *N); + SDValue visitFP_EXTEND(SDNode *N); + SDValue visitFNEG(SDNode *N); + SDValue visitFABS(SDNode *N); + SDValue visitFCEIL(SDNode *N); + SDValue visitFTRUNC(SDNode *N); + SDValue visitFFLOOR(SDNode *N); + SDValue visitFMINNUM(SDNode *N); + SDValue visitFMAXNUM(SDNode *N); + SDValue visitBRCOND(SDNode *N); + SDValue visitBR_CC(SDNode *N); + SDValue visitLOAD(SDNode *N); + + SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); + SDValue replaceStoreOfFPConstant(StoreSDNode *ST); + + SDValue visitSTORE(SDNode *N); + SDValue visitINSERT_VECTOR_ELT(SDNode *N); + SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); + SDValue visitBUILD_VECTOR(SDNode *N); + SDValue visitCONCAT_VECTORS(SDNode *N); + SDValue visitEXTRACT_SUBVECTOR(SDNode *N); + SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitSCALAR_TO_VECTOR(SDNode *N); + SDValue visitINSERT_SUBVECTOR(SDNode *N); + SDValue visitMLOAD(SDNode *N); + SDValue visitMSTORE(SDNode *N); + SDValue visitMGATHER(SDNode *N); + SDValue visitMSCATTER(SDNode *N); + SDValue visitFP_TO_FP16(SDNode *N); + SDValue visitFP16_TO_FP(SDNode *N); + + SDValue visitFADDForFMACombine(SDNode *N); + SDValue visitFSUBForFMACombine(SDNode *N); + SDValue visitFMULForFMACombine(SDNode *N); + + SDValue XformToShuffleWithZero(SDNode *N); + SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); + + SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); + + bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); + SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); + SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + bool NotExtCompare = false); + SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, + SDLoc DL, bool foldBooleans = true); + + bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const; + bool isOneUseSetCC(SDValue N) const; + + SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp); + SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); + SDValue CombineExtLoad(SDNode *N); + SDValue combineRepeatedFPDivisors(SDNode *N); + SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); + SDValue BuildSDIV(SDNode *N); + SDValue BuildSDIVPow2(SDNode *N); + SDValue BuildUDIV(SDNode *N); + SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); + SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); + SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, + SDNodeFlags *Flags); + SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits = true); + SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, + SDValue InnerPos, SDValue InnerNeg, + unsigned PosOpcode, unsigned NegOpcode, + SDLoc DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); + SDValue ReduceLoadWidth(SDNode *N); + SDValue ReduceLoadOpStoreWidth(SDNode *N); + SDValue TransformFPLoadStorePair(SDNode *N); + SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); + SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); + + SDValue GetDemandedBits(SDValue V, const APInt &Mask); + + /// Walk up chain skipping non-aliasing memory nodes, + /// looking for aliasing nodes and adding them to the Aliases vector. + void GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVectorImpl<SDValue> &Aliases); + + /// Return true if there is any possibility that the two addresses overlap. + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; + + /// Walk up chain skipping non-aliasing memory nodes, looking for a better + /// chain (aliasing node.) + SDValue FindBetterChain(SDNode *N, SDValue Chain); + + /// Do FindBetterChain for a store and any possibly adjacent stores on + /// consecutive chains. + bool findBetterNeighborChains(StoreSDNode *St); + + /// Holds a pointer to an LSBaseSDNode as well as information on where it + /// is located in a sequence of memory operations connected by a chain. + struct MemOpLink { + MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): + MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } + // Ptr to the mem node. + LSBaseSDNode *MemNode; + // Offset from the base ptr. + int64_t OffsetFromBase; + // What is the sequence number of this mem node. + // Lowest mem operand in the DAG starts at zero. + unsigned SequenceNum; + }; + + /// This is a helper function for visitMUL to check the profitability + /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). + /// MulNode is the original multiply, AddNode is (add x, c1), + /// and ConstNode is c2. + bool isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode); + + /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a + /// constant build_vector of the stored constant values in Stores. + SDValue getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, + EVT Ty) const; + + /// This is a helper function for visitAND and visitZERO_EXTEND. Returns + /// true if the (and (load x) c) pattern matches an extload. ExtVT returns + /// the type of the loaded value to be extended. LoadedVT returns the type + /// of the original loaded value. NarrowLoad returns whether the load would + /// need to be narrowed in order to match. + bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad); + + /// This is a helper function for MergeConsecutiveStores. When the source + /// elements of the consecutive stores are all constants or all extracted + /// vector elements, try to merge them into one larger store. + /// \return True if a merged store was created. + bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, + EVT MemVT, unsigned NumStores, + bool IsConstantSrc, bool UseVector); + + /// This is a helper function for MergeConsecutiveStores. + /// Stores that may be merged are placed in StoreNodes. + /// Loads that may alias with those stores are placed in AliasLoadNodes. + void getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, + SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); + + /// Merge consecutive store operations into a wide store. + /// This optimization uses wide integers or vectors when possible. + /// \return True if some memory operations were changed. + bool MergeConsecutiveStores(StoreSDNode *N); + + /// \brief Try to transform a truncation where C is a constant: + /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) + /// + /// \p N needs to be a truncation and its first operand an AND. Other + /// requirements are checked by the function (e.g. that trunc is + /// single-use) and if missed an empty SDValue is returned. + SDValue distributeTruncateThroughAnd(SDNode *N); + + public: + DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); + } + + /// Runs the dag combiner on all nodes in the work list + void Run(CombineLevel AtLevel); + + SelectionDAG &getDAG() const { return DAG; } + + /// Returns a type large enough to hold any valid shift amount - before type + /// legalization these can be huge. + EVT getShiftAmountTy(EVT LHSTy) { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + auto &DL = DAG.getDataLayout(); + return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) + : TLI.getPointerTy(DL); + } + + /// This method returns true if we are running before type legalization or + /// if the specified VT is legal. + bool isTypeLegal(const EVT &VT) { + if (!LegalTypes) return true; + return TLI.isTypeLegal(VT); + } + + /// Convenience wrapper around TargetLowering::getSetCCResultType + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + } + }; +} + + +namespace { +/// This class is a DAGUpdateListener that removes any deleted +/// nodes from the worklist. +class WorklistRemover : public SelectionDAG::DAGUpdateListener { + DAGCombiner &DC; +public: + explicit WorklistRemover(DAGCombiner &dc) + : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} + + void NodeDeleted(SDNode *N, SDNode *E) override { + DC.removeFromWorklist(N); + } +}; +} + +//===----------------------------------------------------------------------===// +// TargetLowering::DAGCombinerInfo implementation +//===----------------------------------------------------------------------===// + +void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { + ((DAGCombiner*)DC)->AddToWorklist(N); +} + +void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { + ((DAGCombiner*)DC)->removeFromWorklist(N); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); +} + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); +} + + +SDValue TargetLowering::DAGCombinerInfo:: +CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { + return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); +} + +void TargetLowering::DAGCombinerInfo:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); +} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +void DAGCombiner::deleteAndRecombine(SDNode *N) { + removeFromWorklist(N); + + // If the operands of this node are only used by the node, they will now be + // dead. Make sure to re-visit them and recursively delete dead nodes. + for (const SDValue &Op : N->ops()) + // For an operand generating multiple values, one of the values may + // become dead allowing further simplification (e.g. split index + // arithmetic from an indexed load). + if (Op->hasOneUse() || Op->getNumValues() > 1) + AddToWorklist(Op.getNode()); + + DAG.DeleteNode(N); +} + +/// Return 1 if we can compute the negated form of the specified expression for +/// the same cost as the expression itself, or 2 if we can compute the negated +/// form more cheaply than the expression itself. +static char isNegatibleForFree(SDValue Op, bool LegalOperations, + const TargetLowering &TLI, + const TargetOptions *Options, + unsigned Depth = 0) { + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return 2; + + // Don't allow anything with multiple uses. + if (!Op.hasOneUse()) return 0; + + // Don't recurse exponentially. + if (Depth > 6) return 0; + + switch (Op.getOpcode()) { + default: return false; + case ISD::ConstantFP: + // Don't invert constant FP values after legalize. The negated constant + // isn't necessarily legal. + return LegalOperations ? 0 : 1; + case ISD::FADD: + // FIXME: determine better conditions for this xform. + if (!Options->UnsafeFPMath) return 0; + + // After operation legalization, it might not be legal to create new FSUBs. + if (LegalOperations && + !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) + return 0; + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) + return V; + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + if (!Options->UnsafeFPMath) return 0; + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return 1; + + case ISD::FMUL: + case ISD::FDIV: + if (Options->HonorSignDependentRoundingFPMath()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); + + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FSIN: + return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, + Depth + 1); + } +} + +/// If isNegatibleForFree returns true, return the newly negated expression. +static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, + bool LegalOperations, unsigned Depth = 0) { + const TargetOptions &Options = DAG.getTarget().Options; + // fneg is removable even if it has multiple uses. + if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); + + // Don't allow anything with multiple uses. + assert(Op.hasOneUse() && "Unknown reuse!"); + + assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); + + const SDNodeFlags *Flags = Op.getNode()->getFlags(); + + switch (Op.getOpcode()) { + default: llvm_unreachable("Unknown code"); + case ISD::ConstantFP: { + APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); + V.changeSign(); + return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); + } + case ISD::FADD: + // FIXME: determine better conditions for this xform. + assert(Options.UnsafeFPMath); + + // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &Options, Depth+1)) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1), Flags); + // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), + Op.getOperand(0), Flags); + case ISD::FSUB: + // We can't turn -(A-B) into B-A when we honor signed zeros. + assert(Options.UnsafeFPMath); + + // fold (fneg (fsub 0, B)) -> B + if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) + if (N0CFP->isZero()) + return Op.getOperand(1); + + // fold (fneg (fsub A, B)) -> (fsub B, A) + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(0), Flags); + + case ISD::FMUL: + case ISD::FDIV: + assert(!Options.HonorSignDependentRoundingFPMath()); + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &Options, Depth+1)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1), Flags); + + // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), + GetNegatedExpression(Op.getOperand(1), DAG, + LegalOperations, Depth+1), Flags); + + case ISD::FP_EXTEND: + case ISD::FSIN: + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1)); + case ISD::FP_ROUND: + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), + GetNegatedExpression(Op.getOperand(0), DAG, + LegalOperations, Depth+1), + Op.getOperand(1)); + } +} + +// Return true if this node is a setcc, or is a select_cc +// that selects between the target values used for true and false, making it +// equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to +// the appropriate nodes based on the type of node we are checking. This +// simplifies life a bit for the callers. +bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, + SDValue &CC) const { + if (N.getOpcode() == ISD::SETCC) { + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(2); + return true; + } + + if (N.getOpcode() != ISD::SELECT_CC || + !TLI.isConstTrueVal(N.getOperand(2).getNode()) || + !TLI.isConstFalseVal(N.getOperand(3).getNode())) + return false; + + if (TLI.getBooleanContents(N.getValueType()) == + TargetLowering::UndefinedBooleanContent) + return false; + + LHS = N.getOperand(0); + RHS = N.getOperand(1); + CC = N.getOperand(4); + return true; +} + +/// Return true if this is a SetCC-equivalent operation with only one use. +/// If this is true, it allows the users to invert the operation for free when +/// it is profitable to do so. +bool DAGCombiner::isOneUseSetCC(SDValue N) const { + SDValue N0, N1, N2; + if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) + return true; + return false; +} + +/// Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + +// \brief Returns the SDNode if it is a constant integer BuildVector +// or constant integer. +static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { + if (isa<ConstantSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +// \brief Returns the SDNode if it is a constant float BuildVector +// or constant float. +static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { + if (isa<ConstantFPSDNode>(N)) + return N.getNode(); + if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) + return N.getNode(); + return nullptr; +} + +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// int. +static ConstantSDNode *isConstOrConstSplat(SDValue N) { + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); + + // BuildVectors can truncate their operands. Ignore that case here. + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) + return CN; + } + + return nullptr; +} + +// \brief Returns the SDNode if it is a constant splat BuildVector or constant +// float. +static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { + if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) + return CN; + + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { + BitVector UndefElements; + ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); + + if (CN && UndefElements.none()) + return CN; + } + + return nullptr; +} + +SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, + SDValue N0, SDValue N1) { + EVT VT = N0.getValueType(); + if (N0.getOpcode() == Opc) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { + // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) + return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); + return SDValue(); + } + if (N0.hasOneUse()) { + // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); + if (!OpNode.getNode()) + return SDValue(); + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); + } + } + } + + if (N1.getOpcode() == Opc) { + if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { + if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { + // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) + if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) + return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); + return SDValue(); + } + if (N1.hasOneUse()) { + // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one + // use + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); + if (!OpNode.getNode()) + return SDValue(); + AddToWorklist(OpNode.getNode()); + return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, + bool AddTo) { + assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.1 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + To[0].getNode()->dump(&DAG); + dbgs() << " and " << NumTo-1 << " other values\n"); + for (unsigned i = 0, e = NumTo; i != e; ++i) + assert((!To[i].getNode() || + N->getValueType(i) == To[i].getValueType()) && + "Cannot combine value to value of different type!"); + + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesWith(N, To); + if (AddTo) { + // Push the new nodes and any users onto the worklist + for (unsigned i = 0, e = NumTo; i != e; ++i) { + if (To[i].getNode()) { + AddToWorklist(To[i].getNode()); + AddUsersToWorklist(To[i].getNode()); + } + } + } + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (N->use_empty()) + deleteAndRecombine(N); + return SDValue(N, 0); +} + +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { + // Replace all uses. If any nodes become isomorphic to other nodes and + // are deleted, make sure to remove them from our worklist. + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); + + // Push the new node and any (possibly new) users onto the worklist. + AddToWorklist(TLO.New.getNode()); + AddUsersToWorklist(TLO.New.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. + if (TLO.Old.getNode()->use_empty()) + deleteAndRecombine(TLO.Old.getNode()); +} + +/// Check the specified integer node value to see if it can be simplified or if +/// things it uses can be simplified by bit propagation. If so, return true. +bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); + APInt KnownZero, KnownOne; + if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) + return false; + + // Revisit the node. + AddToWorklist(Op.getNode()); + + // Replace the old value with the new one. + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.2 "; + TLO.Old.getNode()->dump(&DAG); + dbgs() << "\nWith: "; + TLO.New.getNode()->dump(&DAG); + dbgs() << '\n'); + + CommitTargetLoweringOpt(TLO); + return true; +} + +void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { + SDLoc dl(Load); + EVT VT = Load->getValueType(0); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); + + DEBUG(dbgs() << "\nReplacing.9 "; + Load->dump(&DAG); + dbgs() << "\nWith: "; + Trunc.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); + deleteAndRecombine(Load); + AddToWorklist(Trunc.getNode()); +} + +SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { + Replace = false; + SDLoc dl(Op); + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) + : LD->getExtensionType(); + Replace = true; + return DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + MemVT, LD->getMemOperand()); + } + + unsigned Opc = Op.getOpcode(); + switch (Opc) { + default: break; + case ISD::AssertSext: + return DAG.getNode(ISD::AssertSext, dl, PVT, + SExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::AssertZext: + return DAG.getNode(ISD::AssertZext, dl, PVT, + ZExtPromoteOperand(Op.getOperand(0), PVT), + Op.getOperand(1)); + case ISD::Constant: { + unsigned ExtOpc = + Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOpc, dl, PVT, Op); + } + } + + if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) + return SDValue(); + return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); +} + +SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (!NewOp.getNode()) + return SDValue(); + AddToWorklist(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, + DAG.getValueType(OldVT)); +} + +SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + bool Replace = false; + SDValue NewOp = PromoteOperand(Op, PVT, Replace); + if (!NewOp.getNode()) + return SDValue(); + AddToWorklist(NewOp.getNode()); + + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); + return DAG.getZeroExtendInReg(NewOp, dl, OldVT); +} + +/// Promote the specified integer binary operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace0 = false; + SDValue N0 = Op.getOperand(0); + SDValue NN0 = PromoteOperand(N0, PVT, Replace0); + if (!NN0.getNode()) + return SDValue(); + + bool Replace1 = false; + SDValue N1 = Op.getOperand(1); + SDValue NN1; + if (N0 == N1) + NN1 = NN0; + else { + NN1 = PromoteOperand(N1, PVT, Replace1); + if (!NN1.getNode()) + return SDValue(); + } + + AddToWorklist(NN0.getNode()); + if (NN1.getNode()) + AddToWorklist(NN1.getNode()); + + if (Replace0) + ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); + if (Replace1) + ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + SDLoc dl(Op); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, NN0, NN1)); + } + return SDValue(); +} + +/// Promote the specified integer shift operation if the target indicates it is +/// beneficial. e.g. On x86, it's usually better to promote i16 operations to +/// i32 since i16 instructions are longer. +SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + bool Replace = false; + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); + else + N0 = PromoteOperand(N0, PVT, Replace); + if (!N0.getNode()) + return SDValue(); + + AddToWorklist(N0.getNode()); + if (Replace) + ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); + + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + SDLoc dl(Op); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); + } + return SDValue(); +} + +SDValue DAGCombiner::PromoteExtend(SDValue Op) { + if (!LegalOperations) + return SDValue(); + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return SDValue(); + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + DEBUG(dbgs() << "\nPromoting "; + Op.getNode()->dump(&DAG)); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); + } + return SDValue(); +} + +bool DAGCombiner::PromoteLoad(SDValue Op) { + if (!LegalOperations) + return false; + + EVT VT = Op.getValueType(); + if (VT.isVector() || !VT.isInteger()) + return false; + + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return false; + + EVT PVT = VT; + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { + assert(PVT != VT && "Don't know what type to promote to!"); + + SDLoc dl(Op); + SDNode *N = Op.getNode(); + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT MemVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD + : ISD::EXTLOAD) + : LD->getExtensionType(); + SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + LD->getChain(), LD->getBasePtr(), + MemVT, LD->getMemOperand()); + SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); + + DEBUG(dbgs() << "\nPromoting "; + N->dump(&DAG); + dbgs() << "\nTo: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); + deleteAndRecombine(N); + AddToWorklist(Result.getNode()); + return true; + } + return false; +} + +/// \brief Recursively delete a node which has no uses and any operands for +/// which it is the only use. +/// +/// Note that this both deletes the nodes and removes them from the worklist. +/// It also adds any nodes who have had a user deleted to the worklist as they +/// may now have only one use and subject to other combines. +bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { + if (!N->use_empty()) + return false; + + SmallSetVector<SDNode *, 16> Nodes; + Nodes.insert(N); + do { + N = Nodes.pop_back_val(); + if (!N) + continue; + + if (N->use_empty()) { + for (const SDValue &ChildN : N->op_values()) + Nodes.insert(ChildN.getNode()); + + removeFromWorklist(N); + DAG.DeleteNode(N); + } else { + AddToWorklist(N); + } + } while (!Nodes.empty()); + return true; +} + +//===----------------------------------------------------------------------===// +// Main DAG Combiner implementation +//===----------------------------------------------------------------------===// + +void DAGCombiner::Run(CombineLevel AtLevel) { + // set the instance variables, so that the various visit routines may use it. + Level = AtLevel; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; + + // Add all the dag nodes to the worklist. + for (SDNode &Node : DAG.allnodes()) + AddToWorklist(&Node); + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + + // while the worklist isn't empty, find a node and + // try and combine it. + while (!WorklistMap.empty()) { + SDNode *N; + // The Worklist holds the SDNodes in order, but it may contain null entries. + do { + N = Worklist.pop_back_val(); + } while (!N); + + bool GoodWorklistEntry = WorklistMap.erase(N); + (void)GoodWorklistEntry; + assert(GoodWorklistEntry && + "Found a worklist entry without a corresponding map entry!"); + + // If N has no uses, it is dead. Make sure to revisit all N's operands once + // N is deleted from the DAG, since they too may now be dead or may have a + // reduced number of uses, allowing other xforms. + if (recursivelyDeleteUnusedNodes(N)) + continue; + + WorklistRemover DeadNodes(*this); + + // If this combine is running after legalizing the DAG, re-legalize any + // nodes pulled off the worklist. + if (Level == AfterLegalizeDAG) { + SmallSetVector<SDNode *, 16> UpdatedNodes; + bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); + + for (SDNode *LN : UpdatedNodes) { + AddToWorklist(LN); + AddUsersToWorklist(LN); + } + if (!NIsValid) + continue; + } + + DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); + + // Add any operands of the new node which have not yet been combined to the + // worklist as well. Because the worklist uniques things already, this + // won't repeatedly process the same operand. + CombinedNodes.insert(N); + for (const SDValue &ChildN : N->op_values()) + if (!CombinedNodes.count(ChildN.getNode())) + AddToWorklist(ChildN.getNode()); + + SDValue RV = combine(N); + + if (!RV.getNode()) + continue; + + ++NodesCombined; + + // If we get back the same node we passed in, rather than a new node or + // zero, we know that the node must have defined multiple values and + // CombineTo was used. Since CombineTo takes care of the worklist + // mechanics for us, we have no work to do in this case. + if (RV.getNode() == N) + continue; + + assert(N->getOpcode() != ISD::DELETED_NODE && + RV.getNode()->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned new node!"); + + DEBUG(dbgs() << " ... into: "; + RV.getNode()->dump(&DAG)); + + // Transfer debug value. + DAG.TransferDbgValues(SDValue(N, 0), RV); + if (N->getNumValues() == RV.getNode()->getNumValues()) + DAG.ReplaceAllUsesWith(N, RV.getNode()); + else { + assert(N->getValueType(0) == RV.getValueType() && + N->getNumValues() == 1 && "Type mismatch"); + SDValue OpV = RV; + DAG.ReplaceAllUsesWith(N, &OpV); + } + + // Push the new node and any users onto the worklist + AddToWorklist(RV.getNode()); + AddUsersToWorklist(RV.getNode()); + + // Finally, if the node is now dead, remove it from the graph. The node + // may not be dead if the replacement process recursively simplified to + // something else needing this node. This will also take care of adding any + // operands which have lost a user to the worklist. + recursivelyDeleteUnusedNodes(N); + } + + // If the root changed (e.g. it was a dead load, update the root). + DAG.setRoot(Dummy.getValue()); + DAG.RemoveDeadNodes(); +} + +SDValue DAGCombiner::visit(SDNode *N) { + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: return visitTokenFactor(N); + case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); + case ISD::ADD: return visitADD(N); + case ISD::SUB: return visitSUB(N); + case ISD::ADDC: return visitADDC(N); + case ISD::SUBC: return visitSUBC(N); + case ISD::ADDE: return visitADDE(N); + case ISD::SUBE: return visitSUBE(N); + case ISD::MUL: return visitMUL(N); + case ISD::SDIV: return visitSDIV(N); + case ISD::UDIV: return visitUDIV(N); + case ISD::SREM: + case ISD::UREM: return visitREM(N); + case ISD::MULHU: return visitMULHU(N); + case ISD::MULHS: return visitMULHS(N); + case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); + case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); + case ISD::SMULO: return visitSMULO(N); + case ISD::UMULO: return visitUMULO(N); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: return visitIMINMAX(N); + case ISD::AND: return visitAND(N); + case ISD::OR: return visitOR(N); + case ISD::XOR: return visitXOR(N); + case ISD::SHL: return visitSHL(N); + case ISD::SRA: return visitSRA(N); + case ISD::SRL: return visitSRL(N); + case ISD::ROTR: + case ISD::ROTL: return visitRotate(N); + case ISD::BSWAP: return visitBSWAP(N); + case ISD::CTLZ: return visitCTLZ(N); + case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); + case ISD::CTTZ: return visitCTTZ(N); + case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); + case ISD::CTPOP: return visitCTPOP(N); + case ISD::SELECT: return visitSELECT(N); + case ISD::VSELECT: return visitVSELECT(N); + case ISD::SELECT_CC: return visitSELECT_CC(N); + case ISD::SETCC: return visitSETCC(N); + case ISD::SETCCE: return visitSETCCE(N); + case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); + case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); + case ISD::ANY_EXTEND: return visitANY_EXTEND(N); + case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); + case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); + case ISD::TRUNCATE: return visitTRUNCATE(N); + case ISD::BITCAST: return visitBITCAST(N); + case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); + case ISD::FADD: return visitFADD(N); + case ISD::FSUB: return visitFSUB(N); + case ISD::FMUL: return visitFMUL(N); + case ISD::FMA: return visitFMA(N); + case ISD::FDIV: return visitFDIV(N); + case ISD::FREM: return visitFREM(N); + case ISD::FSQRT: return visitFSQRT(N); + case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); + case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); + case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); + case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); + case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); + case ISD::FP_ROUND: return visitFP_ROUND(N); + case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); + case ISD::FP_EXTEND: return visitFP_EXTEND(N); + case ISD::FNEG: return visitFNEG(N); + case ISD::FABS: return visitFABS(N); + case ISD::FFLOOR: return visitFFLOOR(N); + case ISD::FMINNUM: return visitFMINNUM(N); + case ISD::FMAXNUM: return visitFMAXNUM(N); + case ISD::FCEIL: return visitFCEIL(N); + case ISD::FTRUNC: return visitFTRUNC(N); + case ISD::BRCOND: return visitBRCOND(N); + case ISD::BR_CC: return visitBR_CC(N); + case ISD::LOAD: return visitLOAD(N); + case ISD::STORE: return visitSTORE(N); + case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); + case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); + case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); + case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); + case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); + case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); + case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); + case ISD::MGATHER: return visitMGATHER(N); + case ISD::MLOAD: return visitMLOAD(N); + case ISD::MSCATTER: return visitMSCATTER(N); + case ISD::MSTORE: return visitMSTORE(N); + case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); + case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); + } + return SDValue(); +} + +SDValue DAGCombiner::combine(SDNode *N) { + SDValue RV = visit(N); + + // If nothing happened, try a target-specific DAG combine. + if (!RV.getNode()) { + assert(N->getOpcode() != ISD::DELETED_NODE && + "Node was deleted but visit returned NULL!"); + + if (N->getOpcode() >= ISD::BUILTIN_OP_END || + TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { + + // Expose the DAG combiner to the target combiner impls. + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level, false, this); + + RV = TLI.PerformDAGCombine(N, DagCombineInfo); + } + } + + // If nothing happened still, try promoting the operation. + if (!RV.getNode()) { + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + RV = PromoteIntBinOp(SDValue(N, 0)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + RV = PromoteIntShiftOp(SDValue(N, 0)); + break; + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + RV = PromoteExtend(SDValue(N, 0)); + break; + case ISD::LOAD: + if (PromoteLoad(SDValue(N, 0))) + RV = SDValue(N, 0); + break; + } + } + + // If N is a commutative binary node, try commuting it to enable more + // sdisel CSE. + if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && + N->getNumValues() == 1) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // Constant operands are canonicalized to RHS. + if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { + SDValue Ops[] = {N1, N0}; + SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, + N->getFlags()); + if (CSENode) + return SDValue(CSENode, 0); + } + } + + return RV; +} + +/// Given a node, return its input chain if it has one, otherwise return a null +/// sd operand. +static SDValue getInputChainForNode(SDNode *N) { + if (unsigned NumOps = N->getNumOperands()) { + if (N->getOperand(0).getValueType() == MVT::Other) + return N->getOperand(0); + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + return N->getOperand(NumOps-1); + for (unsigned i = 1; i < NumOps-1; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) + return N->getOperand(i); + } + return SDValue(); +} + +SDValue DAGCombiner::visitTokenFactor(SDNode *N) { + // If N has two operands, where one has an input chain equal to the other, + // the 'other' chain is redundant. + if (N->getNumOperands() == 2) { + if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) + return N->getOperand(0); + if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) + return N->getOperand(1); + } + + SmallVector<SDNode *, 8> TFs; // List of token factors to visit. + SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. + SmallPtrSet<SDNode*, 16> SeenOps; + bool Changed = false; // If we should replace this token factor. + + // Start out with this token factor. + TFs.push_back(N); + + // Iterate through token factors. The TFs grows when new token factors are + // encountered. + for (unsigned i = 0; i < TFs.size(); ++i) { + SDNode *TF = TFs[i]; + + // Check each of the operands. + for (const SDValue &Op : TF->op_values()) { + + switch (Op.getOpcode()) { + case ISD::EntryToken: + // Entry tokens don't need to be added to the list. They are + // redundant. + Changed = true; + break; + + case ISD::TokenFactor: + if (Op.hasOneUse() && + std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { + // Queue up for processing. + TFs.push_back(Op.getNode()); + // Clean up in case the token factor is removed. + AddToWorklist(Op.getNode()); + Changed = true; + break; + } + // Fall thru + + default: + // Only add if it isn't already in the list. + if (SeenOps.insert(Op.getNode()).second) + Ops.push_back(Op); + else + Changed = true; + break; + } + } + } + + SDValue Result; + + // If we've changed things around then replace token factor. + if (Changed) { + if (Ops.empty()) { + // The entry token is the only possible outcome. + Result = DAG.getEntryNode(); + } else { + // New and improved token factor. + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); + } + + // Add users to worklist if AA is enabled, since it may introduce + // a lot of new chained token factors while removing memory deps. + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + return CombineTo(N, Result, UseAA /*add to worklist*/); + } + + return Result; +} + +/// MERGE_VALUES can always be eliminated. +SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { + WorklistRemover DeadNodes(*this); + // Replacing results may cause a different MERGE_VALUES to suddenly + // be CSE'd with N, and carry its uses with it. Iterate until no + // uses remain, to ensure that the node can be safely deleted. + // First add the users of this node to the work list so that they + // can be tried again once they have new operands. + AddUsersToWorklist(N); + do { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); + } while (!N->use_empty()); + deleteAndRecombine(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +/// If \p N is a ContantSDNode with isOpaque() == false return it casted to a +/// ContantSDNode pointer else nullptr. +static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); + return Const != nullptr && !Const->isOpaque() ? Const : nullptr; +} + +SDValue DAGCombiner::visitADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (add x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + } + + // fold (add x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); + // fold (add x, 0) -> x + if (isNullConstant(N1)) + return N0; + // fold (add Sym, c) -> Sym+c + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && + GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, + GA->getOffset() + + (uint64_t)N1C->getSExtValue()); + // fold ((c1-A)+c2) -> (c1+c2)-A + if (N1C && N0.getOpcode() == ISD::SUB) + if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(N1C->getAPIntValue()+ + N0C->getAPIntValue(), DL, VT), + N0.getOperand(1)); + } + // reassociate add + if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) + return RADD; + // fold ((0-A) + B) -> B-A + if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); + // fold (A + (0-B)) -> A-B + if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); + // fold (A+(B-A)) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) + return N1.getOperand(0); + // fold ((B-A)+A) -> B + if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) + return N0.getOperand(0); + // fold (A+(B-(A+C))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(0)) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(1)); + // fold (A+(B-(C+A))) to (B-C) + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && + N0 == N1.getOperand(1).getOperand(1)) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), + N1.getOperand(1).getOperand(0)); + // fold (A+((B-A)+or-C)) to (B+or-C) + if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && + N1.getOperand(0).getOpcode() == ISD::SUB && + N0 == N1.getOperand(0).getOperand(1)) + return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, + N1.getOperand(0).getOperand(0), N1.getOperand(1)); + + // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant + if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + + if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); + } + + if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (a+b) -> (a|b) iff a and b share no bits. + if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && + VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); + + // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) + if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N1.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N1.getOperand(0).getOperand(1), + N1.getOperand(1))); + if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && + isNullConstant(N0.getOperand(0).getOperand(0))) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, + N0.getOperand(0).getOperand(1), + N0.getOperand(1))); + + if (N1.getOpcode() == ISD::AND) { + SDValue AndOp0 = N1.getOperand(0); + unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + + // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) + // and similar xforms where the inner op is either ~0 or 0. + if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); + } + } + + // add (sext i1), X -> sub X, (zext i1) + if (N0.getOpcode() == ISD::SIGN_EXTEND && + N0.getOperand(0).getValueType() == MVT::i1 && + !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); + return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); + } + + // add X, (sextinreg Y i1) -> sub X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, DL, VT)); + return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // If the flag result is dead, turn this into an ADD. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + SDLoc(N), MVT::Glue)); + + // canonicalize constant to RHS. + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); + + // fold (addc x, 0) -> x + no carry out + if (isNullConstant(N1)) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, + SDLoc(N), MVT::Glue)); + + // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. + APInt LHSZero, LHSOne; + APInt RHSZero, RHSOne; + DAG.computeKnownBits(N0, LHSZero, LHSOne); + + if (LHSZero.getBoolValue()) { + DAG.computeKnownBits(N1, RHSZero, RHSOne); + + // If all possibly-set bits on the LHS are clear on the RHS, return an OR. + // If all possibly-set bits on the RHS are clear on the LHS, return an OR. + if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) + return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, + SDLoc(N), MVT::Glue)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitADDE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // canonicalize constant to RHS + ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && !N1C) + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), + N1, N0, CarryIn); + + // fold (adde x, y, false) -> (addc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + +// Since it may not be valid to emit a fold to zero for vector initializers +// check if we can before folding. +static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, + bool LegalOperations, bool LegalTypes) { + if (!VT.isVector()) + return DAG.getConstant(0, DL, VT); + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return DAG.getConstant(0, DL, VT); + return SDValue(); +} + +SDValue DAGCombiner::visitSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (sub x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (sub x, x) -> 0 + // FIXME: Refactor this and xor and other similar operations together. + if (N0 == N1) + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + // fold (sub c1, c2) -> c1-c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); + // fold (sub x, c) -> (add x, -c) + if (N1C) { + SDLoc DL(N); + return DAG.getNode(ISD::ADD, DL, VT, N0, + DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); + } + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + if (isAllOnesConstant(N0)) + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); + // fold A-(A-B) -> B + if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) + return N1.getOperand(1); + // fold (A+B)-A -> B + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) + return N0.getOperand(1); + // fold (A+B)-B -> A + if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) + return N0.getOperand(0); + // fold C2-(A+C1) -> (C2-C1)-A + ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : + dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); + if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { + SDLoc DL(N); + SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), + DL, VT); + return DAG.getNode(ISD::SUB, DL, VT, NewC, + N1.getOperand(0)); + } + // fold ((A+(B+or-C))-B) -> A+or-C + if (N0.getOpcode() == ISD::ADD && + (N0.getOperand(1).getOpcode() == ISD::SUB || + N0.getOperand(1).getOpcode() == ISD::ADD) && + N0.getOperand(1).getOperand(0) == N1) + return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(1)); + // fold ((A+(C+B))-B) -> A+C + if (N0.getOpcode() == ISD::ADD && + N0.getOperand(1).getOpcode() == ISD::ADD && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + // fold ((A-(B-C))-C) -> A-B + if (N0.getOpcode() == ISD::SUB && + N0.getOperand(1).getOpcode() == ISD::SUB && + N0.getOperand(1).getOperand(1) == N1) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1).getOperand(0)); + + // If either operand of a sub is undef, the result is undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + // If the relocation model supports it, consider symbol offsets. + if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) + if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { + // fold (sub Sym, c) -> Sym-c + if (N1C && GA->getOpcode() == ISD::GlobalAddress) + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, + GA->getOffset() - + (uint64_t)N1C->getSExtValue()); + // fold (sub Sym+c1, Sym+c2) -> c1-c2 + if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) + if (GA->getGlobal() == GB->getGlobal()) + return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), + SDLoc(N), VT); + } + + // sub X, (sextinreg Y i1) -> add X, (and Y 1) + if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { + VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); + if (TN->getVT() == MVT::i1) { + SDLoc DL(N); + SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), + DAG.getConstant(1, DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + SDLoc DL(N); + + // If the flag result is dead, turn this into an SUB. + if (!N->hasAnyUseOfValue(1)) + return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // fold (subc x, x) -> 0 + no borrow + if (N0 == N1) + return CombineTo(N, DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // fold (subc x, 0) -> x + no borrow + if (isNullConstant(N1)) + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow + if (isAllOnesConstant(N0)) + return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSUBE(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue CarryIn = N->getOperand(2); + + // fold (sube x, y, false) -> (subc x, y) + if (CarryIn.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); + + return SDValue(); +} + +SDValue DAGCombiner::visitMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(N), VT); + + bool N0IsConst = false; + bool N1IsConst = false; + bool N1IsOpaqueConst = false; + bool N0IsOpaqueConst = false; + APInt ConstValue0, ConstValue1; + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + } else { + N0IsConst = isa<ConstantSDNode>(N0); + if (N0IsConst) { + ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); + N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); + } + N1IsConst = isa<ConstantSDNode>(N1); + if (N1IsConst) { + ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); + N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); + } + } + + // fold (mul c1, c2) -> c1*c2 + if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) + return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, + N0.getNode(), N1.getNode()); + + // canonicalize constant to RHS (vector doesn't have to splat) + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); + // fold (mul x, 0) -> 0 + if (N1IsConst && ConstValue1 == 0) + return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + return N0; + // fold (mul x, -1) -> 0-x + if (N1IsConst && ConstValue1.isAllOnesValue()) { + SDLoc DL(N); + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + } + // fold (mul x, (1 << c)) -> x << c + if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && + IsFullSplat) { + SDLoc DL(N); + return DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(ConstValue1.logBase2(), DL, + getShiftAmountTy(N0.getValueType()))); + } + // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c + if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && + IsFullSplat) { + unsigned Log2Val = (-ConstValue1).logBase2(); + SDLoc DL(N); + // FIXME: If the input is something that is easily negated (e.g. a + // single-use add), we should put the negate there. + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), + DAG.getNode(ISD::SHL, DL, VT, N0, + DAG.getConstant(Log2Val, DL, + getShiftAmountTy(N0.getValueType())))); + } + + APInt Val; + // (mul (shl X, c1), c2) -> (mul X, c2 << c1) + if (N1IsConst && N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, + N1, N0.getOperand(1)); + AddToWorklist(C3.getNode()); + return DAG.getNode(ISD::MUL, SDLoc(N), VT, + N0.getOperand(0), C3); + } + + // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one + // use. + { + SDValue Sh(nullptr,0), Y(nullptr,0); + // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). + if (N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa<ConstantSDNode>(N0.getOperand(1))) && + N0.getNode()->hasOneUse()) { + Sh = N0; Y = N1; + } else if (N1.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(N1.getOperand(1)) && + N1.getNode()->hasOneUse()) { + Sh = N1; Y = N0; + } + + if (Sh.getNode()) { + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, + Sh.getOperand(0), Y); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, + Mul, Sh.getOperand(1)); + } + } + + // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) + if (isConstantIntBuildVectorOrConstantInt(N1) && + N0.getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && + isMulAddWithConstProfitable(N, N0, N1)) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, + N0.getOperand(0), N1), + DAG.getNode(ISD::MUL, SDLoc(N1), VT, + N0.getOperand(1), N1)); + + // reassociate mul + if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) + return RMUL; + + return SDValue(); +} + +/// Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: return false; // No libcall for vector types. + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != nullptr; +} + +/// Issue divrem if both quotient and remainder are needed. +SDValue DAGCombiner::useDivRem(SDNode *Node) { + if (Node->use_empty()) + return SDValue(); // This is a dead node, leave it alone. + + EVT VT = Node->getValueType(0); + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + unsigned Opcode = Node->getOpcode(); + bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); + + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + // If DIVREM is going to get expanded into a libcall, + // but there is no libcall available, then don't combine. + if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && + !isDivRemLibcallAvailable(Node, isSigned, TLI)) + return SDValue(); + + // If div is legal, it's better to do the normal expansion + unsigned OtherOpcode = 0; + if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { + OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; + if (TLI.isOperationLegalOrCustom(Opcode, VT)) + return SDValue(); + } else { + OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) + return SDValue(); + } + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + SDValue combined; + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node || User->use_empty()) + continue; + // Convert the other matching node(s), too; + // otherwise, the DIVREM may get target-legalized into something + // target-specific that we won't be able to recognize. + unsigned UserOpc = User->getOpcode(); + if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) { + if (!combined) { + if (UserOpc == OtherOpcode) { + SDVTList VTs = DAG.getVTList(VT, VT); + combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); + } else if (UserOpc == DivRemOpc) { + combined = SDValue(User, 0); + } else { + assert(UserOpc == Opcode); + continue; + } + } + if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) + CombineTo(User, combined); + else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) + CombineTo(User, combined.getValue(1)); + } + } + return combined; +} + +SDValue DAGCombiner::visitSDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + SDLoc DL(N); + + // fold (sdiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); + // fold (sdiv X, 1) -> X + if (N1C && N1C->isOne()) + return N0; + // fold (sdiv X, -1) -> 0-X + if (N1C && N1C->isAllOnesValue()) + return DAG.getNode(ISD::SUB, DL, VT, + DAG.getConstant(0, DL, VT), N0); + + // If we know the sign bits of both operands are zero, strength reduce to a + // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); + } + + // fold (sdiv X, pow2) -> simple ops after legalize + // FIXME: We check for the exact bit here because the generic lowering gives + // better results in that case. The target-specific lowering should learn how + // to handle exact sdivs efficiently. + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { + // Target-specific implementation of sdiv x, pow2. + if (SDValue Res = BuildSDIVPow2(N)) + return Res; + + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); + + // Splat the sign bit into the register + SDValue SGN = + DAG.getNode(ISD::SRA, DL, VT, N0, + DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, + getShiftAmountTy(N0.getValueType()))); + AddToWorklist(SGN.getNode()); + + // Add (N0 < 0) ? abs2 - 1 : 0; + SDValue SRL = + DAG.getNode(ISD::SRL, DL, VT, SGN, + DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, + getShiftAmountTy(SGN.getValueType()))); + SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); + AddToWorklist(SRL.getNode()); + AddToWorklist(ADD.getNode()); // Divide by pow2 + SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, + DAG.getConstant(lg2, DL, + getShiftAmountTy(ADD.getValueType()))); + + // If we're dividing by a positive value, we're done. Otherwise, we must + // negate the result. + if (N1C->getAPIntValue().isNonNegative()) + return SRA; + + AddToWorklist(SRA.getNode()); + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); + } + + // If integer divide is expensive and we satisfy the requirements, emit an + // alternate sequence. Targets may check function attributes for size/speed + // trade-offs. + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildSDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, DL, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitUDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + SDLoc DL(N); + + // fold (udiv c1, c2) -> c1/c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, + N0C, N1C)) + return Folded; + // fold (udiv x, (1 << c)) -> x >>u c + if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) + return DAG.getNode(ISD::SRL, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, + getShiftAmountTy(N0.getValueType()))); + + // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + EVT ADDVT = N1.getOperand(1).getValueType(); + SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, + N1.getOperand(1), + DAG.getConstant(SHC->getAPIntValue() + .logBase2(), + DL, ADDVT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::SRL, DL, VT, N0, Add); + } + } + } + + // fold (udiv x, c) -> alternate + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue Op = BuildUDIV(N)) + return Op; + + // sdiv, srem -> sdivrem + // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. + // Otherwise, we break the simplification logic in visitREM(). + if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) + if (SDValue DivRem = useDivRem(N)) + return DivRem; + + // undef / X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, DL, VT); + // X / undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +// handles ISD::SREM and ISD::UREM +SDValue DAGCombiner::visitREM(SDNode *N) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + bool isSigned = (Opcode == ISD::SREM); + SDLoc DL(N); + + // fold (rem c1, c2) -> c1%c2 + ConstantSDNode *N0C = isConstOrConstSplat(N0); + ConstantSDNode *N1C = isConstOrConstSplat(N1); + if (N0C && N1C) + if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) + return Folded; + + if (isSigned) { + // If we know the sign bits of both operands are zero, strength reduce to a + // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 + if (!VT.isVector()) { + if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UREM, DL, VT, N0, N1); + } + } else { + // fold (urem x, pow2) -> (and x, pow2-1) + if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && + N1C->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::AND, DL, VT, N0, + DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); + } + // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) + if (N1.getOpcode() == ISD::SHL) { + if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { + if (SHC->getAPIntValue().isPowerOf2()) { + SDValue Add = + DAG.getNode(ISD::ADD, DL, VT, N1, + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, + VT)); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::AND, DL, VT, N0, Add); + } + } + } + } + + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + + // If X/C can be simplified by the division-by-constant logic, lower + // X%C to the equivalent of X-X/C*C. + // To avoid mangling nodes, this simplification requires that the combine() + // call for the speculative DIV must not cause a DIVREM conversion. We guard + // against this by skipping the simplification if isIntDivCheap(). When + // div is not cheap, combine will not return a DIVREM. Regardless, + // checking cheapness here makes sense since the simplification results in + // fatter code. + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { + unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; + SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); + AddToWorklist(Div.getNode()); + SDValue OptimizedDiv = combine(Div.getNode()); + if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { + assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && + (OptimizedDiv.getOpcode() != ISD::SDIVREM)); + SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); + SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); + AddToWorklist(Mul.getNode()); + return Sub; + } + } + + // sdiv, srem -> sdivrem + if (SDValue DivRem = useDivRem(N)) + return DivRem.getValue(1); + + // undef % X -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, DL, VT); + // X % undef -> undef + if (N1.getOpcode() == ISD::UNDEF) + return N1; + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHS(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // fold (mulhs x, 0) -> 0 + if (isNullConstant(N1)) + return N1; + // fold (mulhs x, 1) -> (sra x, size(x)-1) + if (isOneConstant(N1)) { + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, + DAG.getConstant(N0.getValueType().getSizeInBits() - 1, + DL, + getShiftAmountTy(N0.getValueType()))); + } + // fold (mulhs x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(N), VT); + + // If the type twice as wide is legal, transform the mulhs to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitMULHU(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // fold (mulhu x, 0) -> 0 + if (isNullConstant(N1)) + return N1; + // fold (mulhu x, 1) -> 0 + if (isOneConstant(N1)) + return DAG.getConstant(0, DL, N0.getValueType()); + // fold (mulhu x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, DL, VT); + + // If the type twice as wide is legal, transform the mulhu to a wider multiply + // plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); + N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); + N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); + N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(N1.getValueType()))); + return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); + } + } + + return SDValue(); +} + +/// Perform optimizations common to nodes that compute two values. LoOp and HiOp +/// give the opcodes for the two computations that are being performed. Return +/// true if a simplification was made. +SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, + unsigned HiOp) { + // If the high half is not needed, just compute the low half. + bool HiExists = N->hasAnyUseOfValue(1); + if (!HiExists && + (!LegalOperations || + TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); + return CombineTo(N, Res, Res); + } + + // If the low half is not needed, just compute the high half. + bool LoExists = N->hasAnyUseOfValue(0); + if (!LoExists && + (!LegalOperations || + TLI.isOperationLegal(HiOp, N->getValueType(1)))) { + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); + return CombineTo(N, Res, Res); + } + + // If both halves are used, return as it is. + if (LoExists && HiExists) + return SDValue(); + + // If the two computed results can be simplified separately, separate them. + if (LoExists) { + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); + AddToWorklist(Lo.getNode()); + SDValue LoOpt = combine(Lo.getNode()); + if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && + (!LegalOperations || + TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) + return CombineTo(N, LoOpt, LoOpt); + } + + if (HiExists) { + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); + AddToWorklist(Hi.getNode()); + SDValue HiOpt = combine(Hi.getNode()); + if (HiOpt.getNode() && HiOpt != Hi && + (!LegalOperations || + TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) + return CombineTo(N, HiOpt, HiOpt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) + return Res; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { + if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) + return Res; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // If the type is twice as wide is legal, transform the mulhu to a wider + // multiply plus a shift. + if (VT.isSimple() && !VT.isVector()) { + MVT Simple = VT.getSimpleVT(); + unsigned SimpleSize = Simple.getSizeInBits(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); + if (TLI.isOperationLegal(ISD::MUL, NewVT)) { + SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); + SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); + Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); + // Compute the high part as N1. + Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, + DAG.getConstant(SimpleSize, DL, + getShiftAmountTy(Lo.getValueType()))); + Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); + // Compute the low part as N0. + Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); + return CombineTo(N, Lo, Hi); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSMULO(SDNode *N) { + // (smulo x, 2) -> (saddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitUMULO(SDNode *N) { + // (umulo x, 2) -> (uaddo x, x) + if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) + if (C2->getAPIntValue() == 2) + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), + N->getOperand(0), N->getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitIMINMAX(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (add c1, c2) -> c1+c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); + + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); + + return SDValue(); +} + +/// If this is a binary operator with two operands of the same opcode, try to +/// simplify it. +SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); + + // Bail early if none of these transforms apply. + if (N0.getNode()->getNumOperands() == 0) return SDValue(); + + // For each of OP in AND/OR/XOR: + // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) + // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) + // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) + // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // + // do not sink logical op inside of a vector extend, since it may combine + // into a vsetcc. + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::BSWAP || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && + !VT.isVector() && + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorklist(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); + } + + // For each of OP in SHL/SRL/SRA/AND... + // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) + // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) + // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || + N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && + N0.getOperand(1) == N1.getOperand(1)) { + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), + N0.getOperand(0).getValueType(), + N0.getOperand(0), N1.getOperand(0)); + AddToWorklist(ORNode.getNode()); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, + ORNode, N0.getOperand(1)); + } + + // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) + // Only perform this optimization after type legalization and before + // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by + // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and + // we don't want to undo this promotion. + // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper + // on scalars. + if ((N0.getOpcode() == ISD::BITCAST || + N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && + Level == AfterLegalizeTypes) { + SDValue In0 = N0.getOperand(0); + SDValue In1 = N1.getOperand(0); + EVT In0Ty = In0.getValueType(); + EVT In1Ty = In1.getValueType(); + SDLoc DL(N); + // If both incoming values are integers, and the original types are the + // same. + if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { + SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); + SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); + AddToWorklist(Op.getNode()); + return BC; + } + } + + // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). + // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) + // If both shuffles use the same mask, and both shuffle within a single + // vector, then it is worthwhile to move the swizzle after the operation. + // The type-legalizer generates this pattern when loading illegal + // vector types from memory. In many cases this allows additional shuffle + // optimizations. + // There are other cases where moving the shuffle after the xor/and/or + // is profitable even if shuffles don't perform a swizzle. + // If both shuffles use the same mask, and both shuffles have the same first + // or second operand, then it might still be profitable to move the shuffle + // after the xor/and/or operation. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); + + assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && + "Inputs to shuffles are not the same type"); + + // Check that both shuffles use the same mask. The masks are known to be of + // the same length because the result vector type is the same. + // Check also that shuffles have only one use to avoid introducing extra + // instructions. + if (SVN0->hasOneUse() && SVN1->hasOneUse() && + SVN0->getMask().equals(SVN1->getMask())) { + SDValue ShOp = N0->getOperand(1); + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, SDLoc(N), VT); + else + ShOp = SDValue(); + } + + // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) + // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) + // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) + if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(0), N1->getOperand(0)); + AddToWorklist(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, + &SVN0->getMask()[0]); + } + + // Don't try to fold this node if it requires introducing a + // build vector of all zeros that might be illegal at this stage. + ShOp = N0->getOperand(0); + if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { + if (!LegalTypes) + ShOp = DAG.getConstant(0, SDLoc(N), VT); + else + ShOp = SDValue(); + } + + // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) + // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) + // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) + if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { + SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + N0->getOperand(1), N1->getOperand(1)); + AddToWorklist(NewNode.getNode()); + return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, + &SVN0->getMask()[0]); + } + } + } + + return SDValue(); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an And operation to a single value. This makes them reusable in the context +/// of visitSELECT(). Rules involving constants are not included as +/// visitSELECT() already handles those cases. +SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, + SDNode *LocReference) { + EVT VT = N1.getValueType(); + + // fold (and x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(LocReference), VT); + // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && + LL.getValueType().isInteger()) { + // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) + if (isNullConstant(LR) && Op1 == ISD::SETEQ) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + if (isAllOnesConstant(LR)) { + // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) + if (Op1 == ISD::SETEQ) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) + if (Op1 == ISD::SETGT) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + } + } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || + (isAllOnesConstant(LR) && isNullConstant(RR)))) { + SDLoc DL(N0); + SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), + LL, DAG.getConstant(1, DL, + LL.getValueType())); + AddToWorklist(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, + DAG.getConstant(2, DL, LL.getValueType()), + ISD::SETUGE); + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + } + + if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && + VT.getSizeInBits() <= 64) { + if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + APInt ADDC = ADDI->getAPIntValue(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal + // immediate for an add, but it is legal if its top c2 bits are set, + // transform the ADD so the immediate doesn't need to be materialized + // in a register. + if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { + APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), + SRLI->getZExtValue()); + if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { + ADDC |= Mask; + if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { + SDLoc DL(N0); + SDValue NewAdd = + DAG.getNode(ISD::ADD, DL, VT, + N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); + CombineTo(N0.getNode(), NewAdd); + // Return N so it doesn't get rechecked! + return SDValue(LocReference, 0); + } + } + } + } + } + } + + return SDValue(); +} + +bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, + EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, + bool &NarrowLoad) { + uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); + + if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) + return false; + + ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + LoadedVT = LoadN->getMemoryVT(); + + if (ExtVT == LoadedVT && + (!LegalOperations || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { + // ZEXTLOAD will match without needing to change the size of the value being + // loaded. + NarrowLoad = false; + return true; + } + + // Do not change the width of a volatile load. + if (LoadN->isVolatile()) + return false; + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) + return false; + + if (LegalOperations && + !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) + return false; + + if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) + return false; + + NarrowLoad = true; + return true; +} + +SDValue DAGCombiner::visitAND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getNullValue( + N0.getValueType().getScalarType().getSizeInBits()), + SDLoc(N), N0.getValueType()); + if (ISD::isBuildVectorAllZeros(N1.getNode())) + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getNullValue( + N1.getValueType().getScalarType().getSizeInBits()), + SDLoc(N), N1.getValueType()); + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; + } + + // fold (and c1, c2) -> c1&c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); + // fold (and x, -1) -> x + if (isAllOnesConstant(N1)) + return N0; + // if (and x, c) is known to be zero, return 0 + unsigned BitWidth = VT.getScalarType().getSizeInBits(); + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(BitWidth))) + return DAG.getConstant(0, SDLoc(N), VT); + // reassociate and + if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) + return RAND; + // fold (and (or x, C), D) -> D if (C & D) == D + if (N1C && N0.getOpcode() == ISD::OR) + if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) + return N1; + // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N0Op0 = N0.getOperand(0); + APInt Mask = ~N1C->getAPIntValue(); + Mask = Mask.trunc(N0Op0.getValueSizeInBits()); + if (DAG.MaskedValueIsZero(N0Op0, Mask)) { + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), + N0.getValueType(), N0Op0); + + // Replace uses of the AND with uses of the Zero extend node. + CombineTo(N, Zext); + + // We actually want to replace all uses of the any_extend with the + // zero_extend, to avoid duplicating things. This will later cause this + // AND to be folded. + CombineTo(N0.getNode(), Zext); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must + // already be zero by virtue of the width of the base type of the load. + // + // the 'X' node here can either be nothing or an extract_vector_elt to catch + // more cases. + if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + N0.getOperand(0).getOpcode() == ISD::LOAD) || + N0.getOpcode() == ISD::LOAD) { + LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? + N0 : N0.getOperand(0) ); + + // Get the constant (if applicable) the zero'th operand is being ANDed with. + // This can be a pure constant or a vector splat, in which case we treat the + // vector as a scalar and use the splat value. + APInt Constant = APInt::getNullValue(1); + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { + Constant = C->getAPIntValue(); + } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, + SplatBitSize, HasAnyUndefs); + if (IsSplat) { + // Undef bits can contribute to a possible optimisation if set, so + // set them. + SplatValue |= SplatUndef; + + // The splat value may be something like "0x00FFFFFF", which means 0 for + // the first vector value and FF for the rest, repeating. We need a mask + // that will apply equally to all members of the vector, so AND all the + // lanes of the constant together. + EVT VT = Vector->getValueType(0); + unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); + + // If the splat value has been compressed to a bitlength lower + // than the size of the vector lane, we need to re-expand it to + // the lane size. + if (BitWidth > SplatBitSize) + for (SplatValue = SplatValue.zextOrTrunc(BitWidth); + SplatBitSize < BitWidth; + SplatBitSize = SplatBitSize * 2) + SplatValue |= SplatValue.shl(SplatBitSize); + + // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a + // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. + if (SplatBitSize % BitWidth == 0) { + Constant = APInt::getAllOnesValue(BitWidth); + for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) + Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); + } + } + } + + // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is + // actually legal and isn't going to get expanded, else this is a false + // optimisation. + bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, + Load->getValueType(0), + Load->getMemoryVT()); + + // Resize the constant to the same size as the original memory access before + // extension. If it is still the AllOnesValue then this AND is completely + // unneeded. + Constant = + Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); + + bool B; + switch (Load->getExtensionType()) { + default: B = false; break; + case ISD::EXTLOAD: B = CanZextLoadProfitably; break; + case ISD::ZEXTLOAD: + case ISD::NON_EXTLOAD: B = true; break; + } + + if (B && Constant.isAllOnesValue()) { + // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to + // preserve semantics once we get rid of the AND. + SDValue NewLoad(Load, 0); + if (Load->getExtensionType() == ISD::EXTLOAD) { + NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, + Load->getValueType(0), SDLoc(Load), + Load->getChain(), Load->getBasePtr(), + Load->getOffset(), Load->getMemoryVT(), + Load->getMemOperand()); + // Replace uses of the EXTLOAD with the new ZEXTLOAD. + if (Load->getNumValues() == 3) { + // PRE/POST_INC loads have 3 values. + SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), + NewLoad.getValue(2) }; + CombineTo(Load, To, 3, true); + } else { + CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); + } + } + + // Fold the AND away, taking care not to fold to the old load node if we + // replaced it. + CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (and (load x), 255) -> (zextload x, i8) + // fold (and (extload x, i16), 255) -> (zextload x, i8) + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast<LoadSDNode>(N0.getOperand(0)) + : cast<LoadSDNode>(N0); + if (LN0->getExtensionType() != ISD::SEXTLOAD && + LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { + auto NarrowLoad = false; + EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) { + if (!NarrowLoad) { + SDValue NewLoad = + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(LN0, NewLoad, NewLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } else { + EVT PtrType = LN0->getOperand(1).getValueType(); + + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); + + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + if (DAG.getDataLayout().isBigEndian()) { + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + SDLoc DL(LN0); + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, + NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } + + AddToWorklist(NewPtr.getNode()); + + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, + LN0->getChain(), NewPtr, + LN0->getPointerInfo(), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Alignment, LN0->getAAInfo()); + AddToWorklist(N); + CombineTo(LN0, Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + } + + if (SDValue Combined = visitANDLike(N0, N1, N)) + return Combined; + + // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; + + // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) + // fold (and (sra)) -> (and (srl)) when possible. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (zext_inreg (extload x)) -> (zextload x) + if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use + if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + // If we zero all the possible extended bits, then we can turn this into + // a zextload if we are running before legalize or the operation is legal. + unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); + if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, + BitWidth - MemVT.getScalarType().getSizeInBits())) && + ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + AddToWorklist(N); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + + return SDValue(); +} + +/// Match (a >> 8) | (a << 8) as (bswap a) >> 16. +SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, + bool DemandHighBits) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) + bool LookPassAnd0 = false; + bool LookPassAnd1 = false; + if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) + std::swap(N0, N1); + if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() == ISD::AND) { + if (!N0.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01C || N01C->getZExtValue() != 0xFF00) + return SDValue(); + N0 = N0.getOperand(0); + LookPassAnd0 = true; + } + + if (N1.getOpcode() == ISD::AND) { + if (!N1.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N11C || N11C->getZExtValue() != 0xFF) + return SDValue(); + N1 = N1.getOperand(0); + LookPassAnd1 = true; + } + + if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) + std::swap(N0, N1); + if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) + return SDValue(); + if (!N0.getNode()->hasOneUse() || + !N1.getNode()->hasOneUse()) + return SDValue(); + + ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); + if (!N01C || !N11C) + return SDValue(); + if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) + return SDValue(); + + // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) + SDValue N00 = N0->getOperand(0); + if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { + if (!N00.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); + if (!N001C || N001C->getZExtValue() != 0xFF) + return SDValue(); + N00 = N00.getOperand(0); + LookPassAnd0 = true; + } + + SDValue N10 = N1->getOperand(0); + if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { + if (!N10.getNode()->hasOneUse()) + return SDValue(); + ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); + if (!N101C || N101C->getZExtValue() != 0xFF00) + return SDValue(); + N10 = N10.getOperand(0); + LookPassAnd1 = true; + } + + if (N00 != N10) + return SDValue(); + + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. + unsigned OpSizeInBits = VT.getSizeInBits(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); + + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } + + SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); + if (OpSizeInBits > 16) { + SDLoc DL(N); + Res = DAG.getNode(ISD::SRL, DL, VT, Res, + DAG.getConstant(OpSizeInBits - 16, DL, + getShiftAmountTy(VT))); + } + return Res; +} + +/// Return true if the specified node is an element that makes up a 32-bit +/// packed halfword byteswap. +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { + if (!N.getNode()->hasOneUse()) + return false; + + unsigned Opc = N.getOpcode(); + if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) + return false; + + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!N1C) + return false; + + unsigned Num; + switch (N1C->getZExtValue()) { + default: + return false; + case 0xFF: Num = 0; break; + case 0xFF00: Num = 1; break; + case 0xFF0000: Num = 2; break; + case 0xFF000000: Num = 3; break; + } + + // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). + SDValue N0 = N.getOperand(0); + if (Opc == ISD::AND) { + if (Num == 0 || Num == 2) { + // (x >> 8) & 0xff + // (x >> 8) & 0xff0000 + if (N0.getOpcode() != ISD::SRL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { + // (x << 8) & 0xff00 + // (x << 8) & 0xff000000 + if (N0.getOpcode() != ISD::SHL) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + } else if (Opc == ISD::SHL) { + // (x & 0xff) << 8 + // (x & 0xff0000) << 8 + if (Num != 0 && Num != 2) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } else { // Opc == ISD::SRL + // (x & 0xff00) >> 8 + // (x & 0xff000000) >> 8 + if (Num != 1 && Num != 3) + return false; + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); + if (!C || C->getZExtValue() != 8) + return false; + } + + if (Parts[Num]) + return false; + + Parts[Num] = N0.getOperand(0).getNode(); + return true; +} + +/// Match a 32-bit packed halfword bswap. That is +/// ((x & 0x000000ff) << 8) | +/// ((x & 0x0000ff00) >> 8) | +/// ((x & 0x00ff0000) << 8) | +/// ((x & 0xff000000) >> 8) +/// => (rotl (bswap x), 16) +SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { + if (!LegalOperations) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + if (!TLI.isOperationLegal(ISD::BSWAP, VT)) + return SDValue(); + + // Look for either + // (or (or (and), (and)), (or (and), (and))) + // (or (or (or (and), (and)), (and)), (and)) + if (N0.getOpcode() != ISD::OR) + return SDValue(); + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + SDNode *Parts[4] = {}; + + if (N1.getOpcode() == ISD::OR && + N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { + // (or (or (and), (and)), (or (and), (and))) + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + SDValue N010 = N01.getOperand(0); + if (!isBSwapHWordElement(N010, Parts)) + return SDValue(); + SDValue N011 = N01.getOperand(1); + if (!isBSwapHWordElement(N011, Parts)) + return SDValue(); + } else { + // (or (or (or (and), (and)), (and)), (and)) + if (!isBSwapHWordElement(N1, Parts)) + return SDValue(); + if (!isBSwapHWordElement(N01, Parts)) + return SDValue(); + if (N00.getOpcode() != ISD::OR) + return SDValue(); + SDValue N000 = N00.getOperand(0); + if (!isBSwapHWordElement(N000, Parts)) + return SDValue(); + SDValue N001 = N00.getOperand(1); + if (!isBSwapHWordElement(N001, Parts)) + return SDValue(); + } + + // Make sure the parts are all coming from the same node. + if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) + return SDValue(); + + SDLoc DL(N); + SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, + SDValue(Parts[0], 0)); + + // Result of the bswap should be rotated by 16. If it's not legal, then + // do (x << 16) | (x >> 16). + SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) + return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); + if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) + return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); +} + +/// This contains all DAGCombine rules which reduce two values combined by +/// an Or operation to a single value \see visitANDLike(). +SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { + EVT VT = N1.getValueType(); + // fold (or x, undef) -> -1 + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { + EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; + return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), + SDLoc(LocReference), VT); + } + // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) + SDValue LL, LR, RL, RR, CC0, CC1; + if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ + ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); + ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); + + if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { + // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) + // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) + if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ORNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); + } + // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) + // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) + if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), + LR.getValueType(), LL, RL); + AddToWorklist(ANDNode.getNode()); + return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); + } + } + // canonicalize equivalent to ll == rl + if (LL == RR && LR == RL) { + Op1 = ISD::getSetCCSwappedOperands(Op1); + std::swap(RL, RR); + } + if (LL == RL && LR == RR) { + bool isInteger = LL.getValueType().isInteger(); + ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); + if (Result != ISD::SETCC_INVALID && + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { + EVT CCVT = getSetCCResultType(LL.getValueType()); + if (N0.getValueType() == CCVT || + (!LegalOperations && N0.getValueType() == MVT::i1)) + return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), + LL, LR, Result); + } + } + } + + // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. + if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + // We can only do this xform if we know that bits from X that are set in C2 + // but not in C1 are already zero. Likewise for Y. + if (const ConstantSDNode *N0O1C = + getAsNonOpaqueConstant(N0.getOperand(1))) { + if (const ConstantSDNode *N1O1C = + getAsNonOpaqueConstant(N1.getOperand(1))) { + // We can only do this xform if we know that bits from X that are set in + // C2 but not in C1 are already zero. Likewise for Y. + const APInt &LHSMask = N0O1C->getAPIntValue(); + const APInt &RHSMask = N1O1C->getAPIntValue(); + + if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && + DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(0), N1.getOperand(0)); + SDLoc DL(LocReference); + return DAG.getNode(ISD::AND, DL, VT, X, + DAG.getConstant(LHSMask | RHSMask, DL, VT)); + } + } + } + } + + // (or (and X, M), (and X, N)) -> (and X, (or M, N)) + if (N0.getOpcode() == ISD::AND && + N1.getOpcode() == ISD::AND && + N0.getOperand(0) == N1.getOperand(0) && + // Don't increase # computations. + (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, + N0.getOperand(1), N1.getOperand(1)); + return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N1.getValueType(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + // do not return N0, because undef node may exist in N0 + return DAG.getConstant( + APInt::getAllOnesValue( + N0.getValueType().getScalarType().getSizeInBits()), + SDLoc(N), N0.getValueType()); + if (ISD::isBuildVectorAllOnes(N1.getNode())) + // do not return N1, because undef node may exist in N1 + return DAG.getConstant( + APInt::getAllOnesValue( + N1.getValueType().getScalarType().getSizeInBits()), + SDLoc(N), N1.getValueType()); + + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) + // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) + // Do this only if the resulting shuffle is legal. + if (isa<ShuffleVectorSDNode>(N0) && + isa<ShuffleVectorSDNode>(N1) && + // Avoid folding a node with illegal type. + TLI.isTypeLegal(VT) && + N0->getOperand(1) == N1->getOperand(1) && + ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { + bool CanFold = true; + unsigned NumElts = VT.getVectorNumElements(); + const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); + const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); + // We construct two shuffle masks: + // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand + // and N1 as the second operand. + // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand + // and N0 as the second operand. + // We do this because OR is commutable and therefore there might be + // two ways to fold this node into a shuffle. + SmallVector<int,4> Mask1; + SmallVector<int,4> Mask2; + + for (unsigned i = 0; i != NumElts && CanFold; ++i) { + int M0 = SV0->getMaskElt(i); + int M1 = SV1->getMaskElt(i); + + // Both shuffle indexes are undef. Propagate Undef. + if (M0 < 0 && M1 < 0) { + Mask1.push_back(M0); + Mask2.push_back(M0); + continue; + } + + if (M0 < 0 || M1 < 0 || + (M0 < (int)NumElts && M1 < (int)NumElts) || + (M0 >= (int)NumElts && M1 >= (int)NumElts)) { + CanFold = false; + break; + } + + Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); + Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); + } + + if (CanFold) { + // Fold this sequence only if the resulting shuffle is 'legal'. + if (TLI.isShuffleMaskLegal(Mask1, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), + N1->getOperand(0), &Mask1[0]); + if (TLI.isShuffleMaskLegal(Mask2, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), + N0->getOperand(0), &Mask2[0]); + } + } + } + + // fold (or c1, c2) -> c1|c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); + // fold (or x, 0) -> x + if (isNullConstant(N1)) + return N0; + // fold (or x, -1) -> -1 + if (isAllOnesConstant(N1)) + return N1; + // fold (or x, c) -> c iff (x & ~c) == 0 + if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) + return N1; + + if (SDValue Combined = visitORLike(N0, N1, N)) + return Combined; + + // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) + if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) + return BSwap; + if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) + return BSwap; + + // reassociate or + if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) + return ROR; + // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) + // iff (c1 & c2) == 0. + if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); + if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { + if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, + N1C, C1)) + return DAG.getNode( + ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); + return SDValue(); + } + } + // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; + + // See if this is some rotate idiom. + if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) + return SDValue(Rot, 0); + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// Match "(X shl/srl V1) & V2" where V2 may not be present. +static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { + if (Op.getOpcode() == ISD::AND) { + if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { + Mask = Op.getOperand(1); + Op = Op.getOperand(0); + } else { + return false; + } + } + + if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { + Shift = Op; + return true; + } + + return false; +} + +// Return true if we can prove that, whenever Neg and Pos are both in the +// range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that +// for two opposing shifts shift1 and shift2 and a value X with OpBits bits: +// +// (or (shift1 X, Neg), (shift2 X, Pos)) +// +// reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate +// in direction shift1 by Neg. The range [0, EltSize) means that we only need +// to consider shift amounts with defined behavior. +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { + // If EltSize is a power of 2 then: + // + // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) + // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). + // + // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check + // for the stronger condition: + // + // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] + // + // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) + // we can just replace Neg with Neg' for the rest of the function. + // + // In other cases we check for the even stronger condition: + // + // Neg == EltSize - Pos [B] + // + // for all Neg and Pos. Note that the (or ...) then invokes undefined + // behavior if Pos == 0 (and consequently Neg == EltSize). + // + // We could actually use [A] whenever EltSize is a power of 2, but the + // only extra cases that it would match are those uninteresting ones + // where Neg and Pos are never in range at the same time. E.g. for + // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) + // as well as (sub 32, Pos), but: + // + // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) + // + // always invokes undefined behavior for 32-bit X. + // + // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. + unsigned MaskLoBits = 0; + if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { + if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { + if (NegC->getAPIntValue() == EltSize - 1) { + Neg = Neg.getOperand(0); + MaskLoBits = Log2_64(EltSize); + } + } + } + + // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. + if (Neg.getOpcode() != ISD::SUB) + return false; + ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); + if (!NegC) + return false; + SDValue NegOp1 = Neg.getOperand(1); + + // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with + // Pos'. The truncation is redundant for the purpose of the equality. + if (MaskLoBits && Pos.getOpcode() == ISD::AND) + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + if (PosC->getAPIntValue() == EltSize - 1) + Pos = Pos.getOperand(0); + + // The condition we need is now: + // + // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask + // + // If NegOp1 == Pos then we need: + // + // EltSize & Mask == NegC & Mask + // + // (because "x & Mask" is a truncation and distributes through subtraction). + APInt Width; + if (Pos == NegOp1) + Width = NegC->getAPIntValue(); + + // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. + // Then the condition we want to prove becomes: + // + // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask + // + // which, again because "x & Mask" is a truncation, becomes: + // + // NegC & Mask == (EltSize - PosC) & Mask + // EltSize & Mask == (NegC + PosC) & Mask + else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) + Width = PosC->getAPIntValue() + NegC->getAPIntValue(); + else + return false; + } else + return false; + + // Now we just need to check that EltSize & Mask == Width & Mask. + if (MaskLoBits) + // EltSize & Mask is 0 since Mask is EltSize - 1. + return Width.getLoBits(MaskLoBits) == 0; + return Width == EltSize; +} + +// A subroutine of MatchRotate used once we have found an OR of two opposite +// shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces +// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the +// former being preferred if supported. InnerPos and InnerNeg are Pos and +// Neg with outer conversions stripped away. +SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, + SDValue Neg, SDValue InnerPos, + SDValue InnerNeg, unsigned PosOpcode, + unsigned NegOpcode, SDLoc DL) { + // fold (or (shl x, (*ext y)), + // (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) or (rotr x, (sub 32, y)) + // + // fold (or (shl x, (*ext (sub 32, y))), + // (srl x, (*ext y))) -> + // (rotr x, y) or (rotl x, (sub 32, y)) + EVT VT = Shifted.getValueType(); + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { + bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); + return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, + HasPos ? Pos : Neg).getNode(); + } + + return nullptr; +} + +// MatchRotate - Handle an 'or' of two operands. If this is one of the many +// idioms for rotate, and if the target supports rotation instructions, generate +// a rot[lr]. +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { + // Must be a legal type. Expanded 'n promoted things won't work with rotates. + EVT VT = LHS.getValueType(); + if (!TLI.isTypeLegal(VT)) return nullptr; + + // The target must have at least one rotate flavor. + bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); + bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); + if (!HasROTL && !HasROTR) return nullptr; + + // Match "(X shl/srl V1) & V2" where V2 may not be present. + SDValue LHSShift; // The shift. + SDValue LHSMask; // AND value if any. + if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) + return nullptr; // Not part of a rotate. + + SDValue RHSShift; // The shift. + SDValue RHSMask; // AND value if any. + if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) + return nullptr; // Not part of a rotate. + + if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) + return nullptr; // Not shifting the same value. + + if (LHSShift.getOpcode() == RHSShift.getOpcode()) + return nullptr; // Shifts must disagree. + + // Canonicalize shl to left side in a shl/srl pair. + if (RHSShift.getOpcode() == ISD::SHL) { + std::swap(LHS, RHS); + std::swap(LHSShift, RHSShift); + std::swap(LHSMask, RHSMask); + } + + unsigned EltSizeInBits = VT.getScalarSizeInBits(); + SDValue LHSShiftArg = LHSShift.getOperand(0); + SDValue LHSShiftAmt = LHSShift.getOperand(1); + SDValue RHSShiftArg = RHSShift.getOperand(0); + SDValue RHSShiftAmt = RHSShift.getOperand(1); + + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) + // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) + if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { + uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); + uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); + if ((LShVal + RShVal) != EltSizeInBits) + return nullptr; + + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, + LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); + + // If there is an AND of either shifted operand, apply it to the result. + if (LHSMask.getNode() || RHSMask.getNode()) { + APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); + SDValue Mask = DAG.getConstant(AllBits, DL, VT); + + if (LHSMask.getNode()) { + APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, LHSMask, + DAG.getConstant(RHSBits, DL, VT))); + } + if (RHSMask.getNode()) { + APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); + Mask = DAG.getNode(ISD::AND, DL, VT, Mask, + DAG.getNode(ISD::OR, DL, VT, RHSMask, + DAG.getConstant(LHSBits, DL, VT))); + } + + Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); + } + + return Rot.getNode(); + } + + // If there is a mask here, and we have a variable shift, we can't be sure + // that we're masking out the right stuff. + if (LHSMask.getNode() || RHSMask.getNode()) + return nullptr; + + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; + if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && + (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || + RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || + RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, + LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); + if (TryL) + return TryL; + + SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, + RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); + if (TryR) + return TryR; + + return nullptr; +} + +SDValue DAGCombiner::visitXOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + + // fold vector ops + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + } + + // fold (xor undef, undef) -> 0. This is a common idiom (misuse). + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(N), VT); + // fold (xor x, undef) -> undef + if (N0.getOpcode() == ISD::UNDEF) + return N0; + if (N1.getOpcode() == ISD::UNDEF) + return N1; + // fold (xor c1, c2) -> c1^c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); + if (N0C && N1C) + return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); + // canonicalize constant to RHS + if (isConstantIntBuildVectorOrConstantInt(N0) && + !isConstantIntBuildVectorOrConstantInt(N1)) + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); + // fold (xor x, 0) -> x + if (isNullConstant(N1)) + return N0; + // reassociate xor + if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) + return RXOR; + + // fold !(x cc y) -> (x !cc y) + SDValue LHS, RHS, CC; + if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { + bool isInt = LHS.getValueType().isInteger(); + ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + isInt); + + if (!LegalOperations || + TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { + switch (N0.getOpcode()) { + default: + llvm_unreachable("Unhandled SetCC Equivalent!"); + case ISD::SETCC: + return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); + case ISD::SELECT_CC: + return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), + N0.getOperand(3), NotCC); + } + } + } + + // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) + if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getNode()->hasOneUse() && + isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ + SDValue V = N0.getOperand(0); + SDLoc DL(N0); + V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, + DAG.getConstant(1, DL, V.getValueType())); + AddToWorklist(V.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); + } + + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc + if (isOneConstant(N1) && VT == MVT::i1 && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); + } + } + // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants + if (isAllOnesConstant(N1) && + (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { + unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS + AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); + } + } + // fold (xor (and x, y), y) -> (and (not x), y) + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0->getOperand(1) == N1) { + SDValue X = N0->getOperand(0); + SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); + AddToWorklist(NotX.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); + } + // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) + if (N1C && N0.getOpcode() == ISD::XOR) { + if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), + DAG.getConstant(N1C->getAPIntValue() ^ + N00C->getAPIntValue(), DL, VT)); + } + if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), + DAG.getConstant(N1C->getAPIntValue() ^ + N01C->getAPIntValue(), DL, VT)); + } + } + // fold (xor x, x) -> 0 + if (N0 == N1) + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); + + // fold (xor (shl 1, x), -1) -> (rotl ~1, x) + // Here is a concrete example of this equivalence: + // i16 x == 14 + // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 + // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 + // + // => + // + // i16 ~1 == 0b1111111111111110 + // i16 rol(~1, 14) == 0b1011111111111111 + // + // Some additional tips to help conceptualize this transform: + // - Try to see the operation as placing a single zero in a value of all ones. + // - There exists no value for x which would allow the result to contain zero. + // - Values of x larger than the bitwidth are undefined and do not require a + // consistent result. + // - Pushing the zero left requires shifting one bits in from the right. + // A rotate left of ~1 is a nice way of achieving the desired result. + if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL + && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { + SDLoc DL(N); + return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), + N0.getOperand(1)); + } + + // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) + if (N0.getOpcode() == N1.getOpcode()) + if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) + return Tmp; + + // Simplify the expression using non-local knowledge. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +/// Handle transforms common to the three shifts, when the shift amount is a +/// constant. +SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { + SDNode *LHS = N->getOperand(0).getNode(); + if (!LHS->hasOneUse()) return SDValue(); + + // We want to pull some binops through shifts, so that we have (and (shift)) + // instead of (shift (and)), likewise for add, or, xor, etc. This sort of + // thing happens with address calculations, so it's important to canonicalize + // it. + bool HighBitSet = false; // Can we transform this if the high bit is set? + + switch (LHS->getOpcode()) { + default: return SDValue(); + case ISD::OR: + case ISD::XOR: + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + case ISD::AND: + HighBitSet = true; // We can only transform sra if the high bit is set. + break; + case ISD::ADD: + if (N->getOpcode() != ISD::SHL) + return SDValue(); // only shl(add) not sr[al](add). + HighBitSet = false; // We can only transform sra if the high bit is clear. + break; + } + + // We require the RHS of the binop to be a constant and not opaque as well. + ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); + if (!BinOpCst) return SDValue(); + + // FIXME: disable this unless the input to the binop is a shift by a constant. + // If it is not a shift, it pessimizes some common cases like: + // + // void foo(int *X, int i) { X[i & 1235] = 1; } + // int bar(int *X, int i) { return X[i & 255]; } + SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); + if ((BinOpLHSVal->getOpcode() != ISD::SHL && + BinOpLHSVal->getOpcode() != ISD::SRA && + BinOpLHSVal->getOpcode() != ISD::SRL) || + !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) + return SDValue(); + + EVT VT = N->getValueType(0); + + // If this is a signed shift right, and the high bit is modified by the + // logical operation, do not perform the transformation. The highBitSet + // boolean indicates the value of the high bit of the constant which would + // cause it to be modified for this operation. + if (N->getOpcode() == ISD::SRA) { + bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); + if (BinOpRHSSignSet != HighBitSet) + return SDValue(); + } + + if (!TLI.isDesirableToCommuteWithShift(LHS)) + return SDValue(); + + // Fold the constants, shifting the binop RHS by the shift amount. + SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), + N->getValueType(0), + LHS->getOperand(1), N->getOperand(1)); + assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); + + // Create the new shift. + SDValue NewShift = DAG.getNode(N->getOpcode(), + SDLoc(LHS->getOperand(0)), + VT, LHS->getOperand(0), N->getOperand(1)); + + // Create the new binop. + return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); +} + +SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { + assert(N->getOpcode() == ISD::TRUNCATE); + assert(N->getOperand(0).getOpcode() == ISD::AND); + + // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) + if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { + SDValue N01 = N->getOperand(0).getOperand(1); + + if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { + if (!N01C->isOpaque()) { + EVT TruncVT = N->getValueType(0); + SDValue N00 = N->getOperand(0).getOperand(0); + APInt TruncC = N01C->getAPIntValue(); + TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); + SDLoc DL(N); + + return DAG.getNode(ISD::AND, DL, TruncVT, + DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), + DAG.getConstant(TruncC, DL, TruncVT)); + } + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitRotate(SDNode *N) { + // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). + if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && + N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); + if (NewOp1.getNode()) + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), NewOp1); + } + return SDValue(); +} + +SDValue DAGCombiner::visitSHL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarSizeInBits(); + + // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); + // If setcc produces all-one true value then: + // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) + if (N1CV && N1CV->isConstant()) { + if (N0.getOpcode() == ISD::AND) { + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); + + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, + N01CV, N1CV)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); + } + } else { + N1C = isConstOrConstSplat(N1); + } + } + } + + // fold (shl c1, c2) -> c1<<c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); + // fold (shl 0, x) -> 0 + if (isNullConstant(N0)) + return N0; + // fold (shl x, c >= size(x)) -> undef + if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) + return DAG.getUNDEF(VT); + // fold (shl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (shl undef, x) -> 0 + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, SDLoc(N), VT); + // if (shl x, c) is known to be zero, return 0 + if (DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, SDLoc(N), VT); + // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); + } + + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SHL) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); + } + } + + // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) + // For this to be valid, the second form must not preserve any of the bits + // that are shifted out by the inner shift in the first form. This means + // the outer shift size must be >= the number of bits added by the ext. + // As a corollary, we don't care what kind of ext it is. + if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) && + N0.getOperand(0).getOpcode() == ISD::SHL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0Op0.getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); + if (c2 >= OpSizeInBits - InnerShiftSize) { + SDLoc DL(N0); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SHL, DL, VT, + DAG.getNode(N0.getOpcode(), DL, VT, + N0Op0->getOperand(0)), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); + } + } + } + + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { + uint64_t c1 = N0Op0C1->getZExtValue(); + if (c1 < VT.getScalarSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDLoc DL(N); + SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), + NewOp0, + DAG.getConstant(c2, DL, CountVT)); + AddToWorklist(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + } + + // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 + // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 + if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && + cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t C1 = N0C1->getZExtValue(); + uint64_t C2 = N1C->getZExtValue(); + SDLoc DL(N); + if (C1 <= C2) + return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(C2 - C1, DL, N1.getValueType())); + return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), + DAG.getConstant(C1 - C2, DL, N1.getValueType())); + } + } + + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or + // (and (srl x, (sub c1, c2), MASK) + // Only fold this if the inner shift has no other uses -- if it does, folding + // this will increase the total number of instructions. + if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N0C1->getZExtValue(); + if (c1 < OpSizeInBits) { + uint64_t c2 = N1C->getZExtValue(); + APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); + SDValue Shift; + if (c2 > c1) { + Mask = Mask.shl(c2 - c1); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), + DAG.getConstant(c2 - c1, DL, N1.getValueType())); + } else { + Mask = Mask.lshr(c1 - c2); + SDLoc DL(N); + Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 - c2, DL, N1.getValueType())); + } + SDLoc DL(N0); + return DAG.getNode(ISD::AND, DL, VT, Shift, + DAG.getConstant(Mask, DL, VT)); + } + } + } + // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) + if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { + unsigned BitSize = VT.getScalarSizeInBits(); + SDLoc DL(N); + SDValue HiBitsMask = + DAG.getConstant(APInt::getHighBitsSet(BitSize, + BitSize - N1C->getZExtValue()), + DL, VT); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), + HiBitsMask); + } + + // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) + // Variant of version done on multiply, except mul by a power of 2 is turned + // into a shift. + APInt Val; + if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isa<ConstantSDNode>(N0.getOperand(1)) || + isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { + SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); + SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); + } + + // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) + if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { + if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { + if (SDValue Folded = + DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); + } + } + + if (N1C && !N1C->isOpaque()) + if (SDValue NewSHL = visitShiftByConstant(N, N1C)) + return NewSHL; + + return SDValue(); +} + +SDValue DAGCombiner::visitSRA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + N1C = isConstOrConstSplat(N1); + } + + // fold (sra c1, c2) -> (sra c1, c2) + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); + // fold (sra 0, x) -> 0 + if (isNullConstant(N0)) + return N0; + // fold (sra -1, x) -> -1 + if (isAllOnesConstant(N0)) + return N0; + // fold (sra x, (setge c, size(x))) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (sra x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports + // sext_inreg. + if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { + unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); + if (VT.isVector()) + ExtVT = EVT::getVectorVT(*DAG.getContext(), + ExtVT, VT.getVectorNumElements()); + if ((!LegalOperations || + TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, + N0.getOperand(0), DAG.getValueType(ExtVT)); + } + + // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRA) { + if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { + unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); + if (Sum >= OpSizeInBits) + Sum = OpSizeInBits - 1; + SDLoc DL(N); + return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), + DAG.getConstant(Sum, DL, N1.getValueType())); + } + } + + // fold (sra (shl X, m), (sub result_size, n)) + // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for + // result_size - n != m. + // If truncate is free for the target sext(shl) is likely to result in better + // code. + if (N0.getOpcode() == ISD::SHL && N1C) { + // Get the two constanst of the shifts, CN0 = m, CN = n. + const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); + if (N01C) { + LLVMContext &Ctx = *DAG.getContext(); + // Determine what the truncate's result bitsize and type would be. + EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); + + if (VT.isVector()) + TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); + + // Determine the residual right-shift amount. + signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); + + // If the shift is not a no-op (in which case this should be just a sign + // extend already), the truncated to type is legal, sign_extend is legal + // on that type, and the truncate to that type is both legal and free, + // perform the transform. + if ((ShiftAmt > 0) && + TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && + TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && + TLI.isTruncateFree(VT, TruncVT)) { + + SDLoc DL(N); + SDValue Amt = DAG.getConstant(ShiftAmt, DL, + getShiftAmountTy(N0.getOperand(0).getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, + N0.getOperand(0), Amt); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, + Shift); + return DAG.getNode(ISD::SIGN_EXTEND, DL, + N->getValueType(0), Trunc); + } + } + } + + // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); + if (NewOp1.getNode()) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); + } + + // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) + // if c1 is equal to the number of bits the trunc removes + if (N0.getOpcode() == ISD::TRUNCATE && + (N0.getOperand(0).getOpcode() == ISD::SRL || + N0.getOperand(0).getOpcode() == ISD::SRA) && + N0.getOperand(0).hasOneUse() && + N0.getOperand(0).getOperand(1).hasOneUse() && + N1C) { + SDValue N0Op0 = N0.getOperand(0); + if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { + unsigned LargeShiftVal = LargeShift->getZExtValue(); + EVT LargeVT = N0Op0.getValueType(); + + if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { + SDLoc DL(N); + SDValue Amt = + DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL, + getShiftAmountTy(N0Op0.getOperand(0).getValueType())); + SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT, + N0Op0.getOperand(0), Amt); + return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA); + } + } + } + + // Simplify, based on bits shifted out of the LHS. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + + // If the sign bit is known to be zero, switch this to a SRL. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); + + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRA = visitShiftByConstant(N, N1C)) + return NewSRA; + + return SDValue(); +} + +SDValue DAGCombiner::visitSRL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N0.getValueType(); + unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + + // fold vector ops + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + if (VT.isVector()) { + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + N1C = isConstOrConstSplat(N1); + } + + // fold (srl c1, c2) -> c1 >>u c2 + ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); + if (N0C && N1C && !N1C->isOpaque()) + return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C); + // fold (srl 0, x) -> 0 + if (isNullConstant(N0)) + return N0; + // fold (srl x, c >= size(x)) -> undef + if (N1C && N1C->getZExtValue() >= OpSizeInBits) + return DAG.getUNDEF(VT); + // fold (srl x, 0) -> x + if (N1C && N1C->isNullValue()) + return N0; + // if (srl x, c) is known to be zero, return 0 + if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), + APInt::getAllOnesValue(OpSizeInBits))) + return DAG.getConstant(0, SDLoc(N), VT); + + // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) + if (N1C && N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { + uint64_t c1 = N01C->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + SDLoc DL(N); + if (c1 + c2 >= OpSizeInBits) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), + DAG.getConstant(c1 + c2, DL, N1.getValueType())); + } + } + + // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) + if (N1C && N0.getOpcode() == ISD::TRUNCATE && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); + uint64_t c2 = N1C->getZExtValue(); + EVT InnerShiftVT = N0.getOperand(0).getValueType(); + EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); + uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); + // This is only valid if the OpSizeInBits + c1 = size of inner shift. + if (c1 + OpSizeInBits == InnerShiftSize) { + SDLoc DL(N0); + if (c1 + c2 >= InnerShiftSize) + return DAG.getConstant(0, DL, VT); + return DAG.getNode(ISD::TRUNCATE, DL, VT, + DAG.getNode(ISD::SRL, DL, InnerShiftVT, + N0.getOperand(0)->getOperand(0), + DAG.getConstant(c1 + c2, DL, + ShiftCountVT))); + } + } + + // fold (srl (shl x, c), c) -> (and x, cst2) + if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { + unsigned BitSize = N0.getScalarValueSizeInBits(); + if (BitSize <= 64) { + uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), + DAG.getConstant(~0ULL >> ShAmt, DL, VT)); + } + } + + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) + if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { + // Shifting in all undef bits? + EVT SmallVT = N0.getOperand(0).getValueType(); + unsigned BitSize = SmallVT.getScalarSizeInBits(); + if (N1C->getZExtValue() >= BitSize) + return DAG.getUNDEF(VT); + + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); + SDLoc DL0(N0); + SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT, + N0.getOperand(0), + DAG.getConstant(ShiftAmt, DL0, + getShiftAmountTy(SmallVT))); + AddToWorklist(SmallShift.getNode()); + APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift), + DAG.getConstant(Mask, DL, VT)); + } + } + + // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign + // bit, which is unmodified by sra. + if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { + if (N0.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); + } + + // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). + if (N1C && N0.getOpcode() == ISD::CTLZ && + N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { + APInt KnownZero, KnownOne; + DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); + + // If any of the input bits are KnownOne, then the input couldn't be all + // zeros, thus the result of the srl will always be zero. + if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT); + + // If all of the bits input the to ctlz node are known to be zero, then + // the result of the ctlz is "32" and the result of the shift is one. + APInt UnknownBits = ~KnownZero; + if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT); + + // Otherwise, check to see if there is exactly one bit input to the ctlz. + if ((UnknownBits & (UnknownBits - 1)) == 0) { + // Okay, we know that only that the single bit specified by UnknownBits + // could be set on input to the CTLZ node. If this bit is set, the SRL + // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair + // to an SRL/XOR pair, which is likely to simplify more. + unsigned ShAmt = UnknownBits.countTrailingZeros(); + SDValue Op = N0.getOperand(0); + + if (ShAmt) { + SDLoc DL(N0); + Op = DAG.getNode(ISD::SRL, DL, VT, Op, + DAG.getConstant(ShAmt, DL, + getShiftAmountTy(Op.getValueType()))); + AddToWorklist(Op.getNode()); + } + + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT, + Op, DAG.getConstant(1, DL, VT)); + } + } + + // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). + if (N1.getOpcode() == ISD::TRUNCATE && + N1.getOperand(0).getOpcode() == ISD::AND) { + if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); + } + + // fold operands of srl based on knowledge that the low bits are not + // demanded. + if (N1C && SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + if (N1C && !N1C->isOpaque()) + if (SDValue NewSRL = visitShiftByConstant(N, N1C)) + return NewSRL; + + // Attempt to convert a srl of a load into a narrower zero-extending load. + if (SDValue NarrowLoad = ReduceLoadWidth(N)) + return NarrowLoad; + + // Here is a common situation. We want to optimize: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // However when after the source operand of SRL is optimized into AND, the SRL + // itself may not be optimized further. Look for it and add the BRCOND into + // the worklist. + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorklist(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorklist(Use); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBSWAP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (bswap c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0); + // fold (bswap (bswap x)) -> x + if (N0.getOpcode() == ISD::BSWAP) + return N0->getOperand(0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctlz_zero_undef c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (cttz_zero_undef c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); + return SDValue(); +} + +SDValue DAGCombiner::visitCTPOP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ctpop c1) -> c2 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); + return SDValue(); +} + + +/// \brief Generate Min/Max node +static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, + SDValue True, SDValue False, + ISD::CondCode CC, const TargetLowering &TLI, + SelectionDAG &DAG) { + if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) + return SDValue(); + + switch (CC) { + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETULT: + case ISD::SETULE: { + unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: { + unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; + if (TLI.isOperationLegal(Opcode, VT)) + return DAG.getNode(Opcode, DL, VT, LHS, RHS); + return SDValue(); + } + default: + return SDValue(); + } +} + +SDValue DAGCombiner::visitSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + EVT VT = N->getValueType(0); + EVT VT0 = N0.getValueType(); + + // fold (select C, X, X) -> X + if (N1 == N2) + return N1; + if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) { + // fold (select true, X, Y) -> X + // fold (select false, X, Y) -> Y + return !N0C->isNullValue() ? N1 : N2; + } + // fold (select C, 1, X) -> (or C, X) + if (VT == MVT::i1 && isOneConstant(N1)) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); + // fold (select C, 0, 1) -> (xor C, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. + if (VT.isInteger() && + (VT0 == MVT::i1 || (VT0.isInteger() && + TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(false, true) && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && + isNullConstant(N1) && isOneConstant(N2)) { + SDValue XORNode; + if (VT == VT0) { + SDLoc DL(N); + return DAG.getNode(ISD::XOR, DL, VT0, + N0, DAG.getConstant(1, DL, VT0)); + } + SDLoc DL0(N0); + XORNode = DAG.getNode(ISD::XOR, DL0, VT0, + N0, DAG.getConstant(1, DL0, VT0)); + AddToWorklist(XORNode.getNode()); + if (VT.bitsGT(VT0)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); + } + // fold (select C, 0, X) -> (and (not C), X) + if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) { + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); + AddToWorklist(NOTNode.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); + } + // fold (select C, X, 1) -> (or (not C), X) + if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) { + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); + AddToWorklist(NOTNode.getNode()); + return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); + } + // fold (select C, X, 0) -> (and C, X) + if (VT == MVT::i1 && isNullConstant(N2)) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); + // fold (select X, X, Y) -> (or X, Y) + // fold (select X, 1, Y) -> (or X, Y) + if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1))) + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); + // fold (select X, Y, X) -> (and X, Y) + // fold (select X, Y, 0) -> (and X, Y) + if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2))) + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + if (VT0 == MVT::i1) { + // The code in this block deals with the following 2 equivalences: + // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y)) + // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y) + // The target can specify its prefered form with the + // shouldNormalizeToSelectSequence() callback. However we always transform + // to the right anyway if we find the inner select exists in the DAG anyway + // and we always transform to the left side if we know that we can further + // optimize the combination of the conditions. + bool normalizeToSequence + = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT); + // select (and Cond0, Cond1), X, Y + // -> select Cond0, (select Cond1, X, Y), Y + if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, + InnerSelect, N2); + } + // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) + if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { + SDValue Cond0 = N0->getOperand(0); + SDValue Cond1 = N0->getOperand(1); + SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), + N1.getValueType(), Cond1, N1, N2); + if (normalizeToSequence || !InnerSelect.use_empty()) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, + InnerSelect); + } + + // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y + if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) { + SDValue N1_0 = N1->getOperand(0); + SDValue N1_1 = N1->getOperand(1); + SDValue N1_2 = N1->getOperand(2); + if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { + // Create the actual and node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), + N0, N1_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, + N1_1, N2); + } + // Otherwise see if we can optimize the "and" to a better pattern. + if (SDValue Combined = visitANDLike(N0, N1_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1_1, N2); + } + } + // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y + if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) { + SDValue N2_0 = N2->getOperand(0); + SDValue N2_1 = N2->getOperand(1); + SDValue N2_2 = N2->getOperand(2); + if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { + // Create the actual or node if we can generate good code for it. + if (!normalizeToSequence) { + SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), + N0, N2_0); + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, + N1, N2_2); + } + // Otherwise see if we can optimize to a better pattern. + if (SDValue Combined = visitORLike(N0, N2_0, N)) + return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, + N1, N2_2); + } + } + } + + // fold selects based on a setcc into other things, such as min/max/abs + if (N0.getOpcode() == ISD::SETCC) { + // select x, y (fcmp lt x, y) -> fminnum x, y + // select x, y (fcmp gt x, y) -> fmaxnum x, y + // + // This is OK if we don't care about what happens if either operand is a + // NaN. + // + + // FIXME: Instead of testing for UnsafeFPMath, this should be checking for + // no signed zeros as well as no nans. + const TargetOptions &Options = DAG.getTarget().Options; + if (Options.UnsafeFPMath && + VT.isFloatingPoint() && N0.hasOneUse() && + DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + + if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), N1, N2, CC, + TLI, DAG)) + return FMinMax; + } + + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), + N1, N2, N0.getOperand(2)); + return SimplifySelect(SDLoc(N), N0, N1, N2); + } + + return SDValue(); +} + +static +std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + +// This function assumes all the vselect's arguments are CONCAT_VECTOR +// nodes and that the condition is a BV of ConstantSDNodes (or undefs). +static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + EVT VT = N->getValueType(0); + int NumElems = VT.getVectorNumElements(); + assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && + RHS.getOpcode() == ISD::CONCAT_VECTORS && + Cond.getOpcode() == ISD::BUILD_VECTOR); + + // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about + // binary ones here. + if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) + return SDValue(); + + // We're sure we have an even number of elements due to the + // concat_vectors we have as arguments to vselect. + // Skip BV elements until we find one that's not an UNDEF + // After we find an UNDEF element, keep looping until we get to half the + // length of the BV and see if all the non-undef nodes are the same. + ConstantSDNode *BottomHalf = nullptr; + for (int i = 0; i < NumElems / 2; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (BottomHalf == nullptr) + BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != BottomHalf) + return SDValue(); + } + + // Do the same for the second half of the BuildVector + ConstantSDNode *TopHalf = nullptr; + for (int i = NumElems / 2; i < NumElems; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (TopHalf == nullptr) + TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != TopHalf) + return SDValue(); + } + + assert(TopHalf && BottomHalf && + "One half of the selector was all UNDEFs and the other was all the " + "same value. This should have been addressed before this function."); + return DAG.getNode( + ISD::CONCAT_VECTORS, dl, VT, + BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +} + +SDValue DAGCombiner::visitMSCATTER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue Mask = MSC->getMask(); + SDValue Data = MSC->getValue(); + SDLoc DL(N); + + // If the MSCATTER data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); + + SDValue Chain = MSC->getChain(); + + EVT MemoryVT = MSC->getMemoryVT(); + unsigned Alignment = MSC->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue BasePtr = MSC->getBasePtr(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MSC->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MSC->getAAInfo(), MSC->getRanges()); + + SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGCombiner::visitMSTORE(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + SDValue Data = MST->getValue(); + SDLoc DL(N); + + // If the MSTORE data type requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (Mask.getOpcode() == ISD::SETCC) { + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); + + SDValue Chain = MST->getChain(); + SDValue Ptr = MST->getBasePtr(); + + EVT MemoryVT = MST->getMemoryVT(); + unsigned Alignment = MST->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, MST->getAAInfo(), MST->getRanges()); + + Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + MST->isTruncatingStore()); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MST->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, MST->getAAInfo(), + MST->getRanges()); + + Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + MST->isTruncatingStore()); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); + } + return SDValue(); +} + +SDValue DAGCombiner::visitMGATHER(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N); + SDValue Mask = MGT->getMask(); + SDLoc DL(N); + + // If the MGATHER result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() != ISD::SETCC) + return SDValue(); + + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MGT->getValue(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); + + SDValue Chain = MGT->getChain(); + EVT MemoryVT = MGT->getMemoryVT(); + unsigned Alignment = MGT->getOriginalAlignment(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue BasePtr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue IndexLo, IndexHi; + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo }; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo, + MMO); + + SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi, + MMO); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain); + + SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { GatherRes, Chain }; + return DAG.getMergeValues(RetOps, DL); +} + +SDValue DAGCombiner::visitMLOAD(SDNode *N) { + + if (Level >= AfterLegalizeTypes) + return SDValue(); + + MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); + SDValue Mask = MLD->getMask(); + SDLoc DL(N); + + // If the MLOAD result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + + if (Mask.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue MaskLo, MaskHi, Lo, Hi; + std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); + + SDValue Src0 = MLD->getSrc0(); + SDValue Src0Lo, Src0Hi; + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Chain = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + EVT MemoryVT = MLD->getMemoryVT(); + unsigned Alignment = MLD->getOriginalAlignment(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ISD::NON_EXTLOAD); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ISD::NON_EXTLOAD); + + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); + + SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + + SDValue RetOps[] = { LoadRes, Chain }; + return DAG.getMergeValues(RetOps, DL); + } + return SDValue(); +} + +SDValue DAGCombiner::visitVSELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDLoc DL(N); + + // Canonicalize integer abs. + // vselect (setg[te] X, 0), X, -X -> + // vselect (setgt X, -1), X, -X -> + // vselect (setl[te] X, 0), -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N0.getOpcode() == ISD::SETCC) { + SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + bool isAbs = false; + bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); + + if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && + N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); + else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && + N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) + isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); + + if (isAbs) { + EVT VT = LHS.getValueType(); + SDValue Shift = DAG.getNode( + ISD::SRA, DL, VT, LHS, + DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT)); + SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); + } + } + + if (SimplifySelectOps(N, N1, N2)) + return SDValue(N, 0); // Don't revisit N. + + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorklist(Lo.getNode()); + AddToWorklist(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + + // Fold (vselect (build_vector all_ones), N1, N2) -> N1 + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N2; + + // The ConvertSelectToConcatVector function is assuming both the above + // checks for (vselect (build_vector all{ones,zeros) ...) have been made + // and addressed. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N2.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + if (SDValue CV = ConvertSelectToConcatVector(N, DAG)) + return CV; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + SDValue N4 = N->getOperand(4); + ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); + + // fold select_cc lhs, rhs, x, x, cc -> x + if (N2 == N3) + return N2; + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), + N0, N1, CC, SDLoc(N), false); + if (SCC.getNode()) { + AddToWorklist(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } else if (SCC->getOpcode() == ISD::UNDEF) { + // When the condition is UNDEF, just return the first operand. This is + // coherent the DAG creation, no setcc node is created in this case + return N2; + } else if (SCC.getOpcode() == ISD::SETCC) { + // Fold to a simpler select_cc + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + } + } + + // If we can fold this based on the true/false value, do so. + if (SimplifySelectOps(N, N2, N3)) + return SDValue(N, 0); // Don't revisit N. + + // fold select_cc into other things, such as min/max/abs + return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); +} + +SDValue DAGCombiner::visitSETCC(SDNode *N) { + return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), + cast<CondCodeSDNode>(N->getOperand(2))->get(), + SDLoc(N)); +} + +SDValue DAGCombiner::visitSETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + + // If Carry is false, fold to a regular SETCC. + if (Carry.getOpcode() == ISD::CARRY_FALSE) + return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond); + + return SDValue(); +} + +/// Try to fold a sext/zext/aext dag node into a ConstantSDNode or +/// a build_vector of constants. +/// This function is called by the DAGCombiner when visiting sext/zext/aext +/// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). +/// Vector extends are not folded if operations are legal; this is to +/// avoid introducing illegal build_vector dag nodes. +static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, + SelectionDAG &DAG, bool LegalTypes, + bool LegalOperations) { + unsigned Opcode = N->getOpcode(); + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || + Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + && "Expected EXTEND dag node in input!"); + + // fold (sext c1) -> c1 + // fold (zext c1) -> c1 + // fold (aext c1) -> c1 + if (isa<ConstantSDNode>(N0)) + return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); + + // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) + // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) + EVT SVT = VT.getScalarType(); + if (!(VT.isVector() && + (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) + return nullptr; + + // We can fold this node into a build_vector. + unsigned VTBits = SVT.getSizeInBits(); + unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); + SmallVector<SDValue, 8> Elts; + unsigned NumElts = VT.getVectorNumElements(); + SDLoc DL(N); + + for (unsigned i=0; i != NumElts; ++i) { + SDValue Op = N0->getOperand(i); + if (Op->getOpcode() == ISD::UNDEF) { + Elts.push_back(DAG.getUNDEF(SVT)); + continue; + } + + SDLoc DL(Op); + // Get the constant value and if needed trunc it to the size of the type. + // Nodes like build_vector might have constants wider than the scalar type. + APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits); + if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG) + Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT)); + else + Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT)); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); +} + +// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" +// transformation. Returns true if extension are possible and the above +// mentioned transformation is profitable. +static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, + unsigned ExtOpc, + SmallVectorImpl<SDNode *> &ExtendNodes, + const TargetLowering &TLI) { + bool HasCopyToRegUses = false; + bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + SDNode *User = *UI; + if (User == N) + continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; + // FIXME: Only extend SETCC N, N and SETCC N, c for now. + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { + ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); + if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) + // Sign bits will be lost after a zext. + return false; + bool Add = false; + for (unsigned i = 0; i != 2; ++i) { + SDValue UseOp = User->getOperand(i); + if (UseOp == N0) + continue; + if (!isa<ConstantSDNode>(UseOp)) + return false; + Add = true; + } + if (Add) + ExtendNodes.push_back(User); + continue; + } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; + } + + if (HasCopyToRegUses) { + bool BothLiveOut = false; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; + } + } + if (BothLiveOut) + // Both unextended and extended values are live out. There had better be + // a good reason for the transformation. + return ExtendNodes.size(); + } + return true; +} + +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, + ISD::NodeType ExtType) { + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector<SDValue, 4> Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); + } +} + +// FIXME: Bring more similar combines here, common to sext/zext (maybe aext?). +SDValue DAGCombiner::CombineExtLoad(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT DstVT = N->getValueType(0); + EVT SrcVT = N0.getValueType(); + + assert((N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::ZERO_EXTEND) && + "Unexpected node type (not an extend)!"); + + // fold (sext (load x)) to multiple smaller sextloads; same for zext. + // For example, on a target with legal v4i32, but illegal v8i32, turn: + // (v8i32 (sext (v8i16 (load x)))) + // into: + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))) + // Where uses of the original load, i.e.: + // (v8i16 (load x)) + // are replaced with: + // (v8i16 (truncate + // (v8i32 (concat_vectors (v4i32 (sextload x)), + // (v4i32 (sextload (x + 16))))))) + // + // This combine is only applicable to illegal, but splittable, vectors. + // All legal types, and illegal non-vector types, are handled elsewhere. + // This combine is controlled by TargetLowering::isVectorLoadExtDesirable. + // + if (N0->getOpcode() != ISD::LOAD) + return SDValue(); + + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + + if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) || + !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() || + !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0))) + return SDValue(); + + SmallVector<SDNode *, 4> SetCCs; + if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI)) + return SDValue(); + + ISD::LoadExtType ExtType = + N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD; + + // Try to split the vector types to get down to legal types. + EVT SplitSrcVT = SrcVT; + EVT SplitDstVT = DstVT; + while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) && + SplitSrcVT.getVectorNumElements() > 1) { + SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first; + SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first; + } + + if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT)) + return SDValue(); + + SDLoc DL(N); + const unsigned NumSplits = + DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements(); + const unsigned Stride = SplitSrcVT.getStoreSize(); + SmallVector<SDValue, 4> Loads; + SmallVector<SDValue, 4> Chains; + + SDValue BasePtr = LN0->getBasePtr(); + for (unsigned Idx = 0; Idx < NumSplits; Idx++) { + const unsigned Offset = Idx * Stride; + const unsigned Align = MinAlign(LN0->getAlignment(), Offset); + + SDValue SplitLoad = DAG.getExtLoad( + ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr, + LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, + LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(), + Align, LN0->getAAInfo()); + + BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Stride, DL, BasePtr.getValueType())); + + Loads.push_back(SplitLoad.getValue(0)); + Chains.push_back(SplitLoad.getValue(1)); + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads); + + CombineTo(N, NewValue); + + // Replace uses of the original load (before extension) + // with a truncate of the concatenated sextloaded vectors. + SDValue Trunc = + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue); + CombineTo(N0.getNode(), Trunc, NewChain); + ExtendSetCCUses(SetCCs, Trunc, NewValue, DL, + (ISD::NodeType)N->getOpcode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! +} + +SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + // fold (sext (sext x)) -> (sext x) + // fold (sext (aext x)) -> (sext x) + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, + N0.getOperand(0)); + + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (sext (truncate (load x))) -> (sext (smaller load x)) + // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // See if the value being truncated is already sign extended. If so, just + // eliminate the trunc/sext pair. + SDValue Op = N0.getOperand(0); + unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); + unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); + unsigned DestBits = VT.getScalarType().getSizeInBits(); + unsigned NumSignBits = DAG.ComputeNumSignBits(Op); + + if (OpBits == DestBits) { + // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign + // bits, it is already ready. + if (NumSignBits > DestBits-MidBits) + return Op; + } else if (OpBits < DestBits) { + // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign + // bits, just sext from i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); + } else { + // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign + // bits, just truncate to i32. + if (NumSignBits > OpBits-MidBits) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + } + + // fold (sext (truncate x)) -> (sextinreg x). + if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, + N0.getValueType())) { + if (OpBits < DestBits) + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); + else if (OpBits > DestBits) + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, + DAG.getValueType(N0.getValueType())); + } + } + + // fold (sext (load x)) -> (sext (truncate (sextload x))) + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (load x)) to multiple smaller sextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + + // fold (sext (sextload x)) -> (sext (truncate (sextload x))) + // fold (sext ( extload x)) -> (sext (truncate (sextload x))) + if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (sext (and/or/xor (load x), cst)) -> + // (and/or/xor (sextload x), (sext cst)) + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, + SetCCs, TLI); + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.sext(VT.getSizeInBits()); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + SDLoc(N0.getOperand(0)), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, + ISD::SIGN_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + if (N0.getOpcode() == ISD::SETCC) { + EVT N0VT = N0.getOperand(0).getValueType(); + // sext(setcc) -> sext_in_reg(vsetcc) for vectors. + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations && + TLI.getBooleanContents(N0VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { + // On some architectures (such as SSE/NEON/etc) the SETCC result type is + // of the same size as the compared operands. Only optimize sext(setcc()) + // if this is the case. + EVT SVT = getSetCCResultType(N0VT); + + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == SVT.getSizeInBits()) + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); + if (SVT == MatchingVectorType) { + SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, + N0.getOperand(0), N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); + } + } + + // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) + unsigned ElementWidth = VT.getScalarType().getSizeInBits(); + SDLoc DL(N); + SDValue NegOne = + DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT); + SDValue SCC = + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + NegOne, DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + + if (!VT.isVector()) { + EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) { + SDLoc DL(N); + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + SDValue SetCC = DAG.getSetCC(DL, SetCCVT, + N0.getOperand(0), N0.getOperand(1), CC); + return DAG.getSelect(DL, VT, SetCC, + NegOne, DAG.getConstant(0, DL, VT)); + } + } + } + + // fold (sext x) -> (zext x) if the sign bit is known zero. + if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && + DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); + + return SDValue(); +} + +// isTruncateOf - If N is a truncate of some other value, return true, record +// the value being truncated in Op and which of Op's bits are zero in KnownZero. +// This function computes KnownZero to avoid a duplicated call to +// computeKnownBits in the caller. +static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, + APInt &KnownZero) { + APInt KnownOne; + if (N->getOpcode() == ISD::TRUNCATE) { + Op = N->getOperand(0); + DAG.computeKnownBits(Op, KnownZero, KnownOne); + return true; + } + + if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || + cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) + return false; + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + assert(Op0.getValueType() == Op1.getValueType()); + + if (isNullConstant(Op0)) + Op = Op1; + else if (isNullConstant(Op1)) + Op = Op0; + else + return false; + + DAG.computeKnownBits(Op, KnownZero, KnownOne); + + if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) + return false; + + return true; +} + +SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + // fold (zext (zext x)) -> (zext x) + // fold (zext (aext x)) -> (zext x) + if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, + N0.getOperand(0)); + + // fold (zext (truncate x)) -> (zext x) or + // (zext (truncate x)) -> (truncate x) + // This is valid when the truncated bits of x are already zero. + // FIXME: We should extend this to work for vectors too. + SDValue Op; + APInt KnownZero; + if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { + APInt TruncatedBits = + (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? + APInt(Op.getValueSizeInBits(), 0) : + APInt::getBitsSet(Op.getValueSizeInBits(), + N0.getValueSizeInBits(), + std::min(Op.getValueSizeInBits(), + VT.getSizeInBits())); + if (TruncatedBits == (KnownZero & TruncatedBits)) { + if (VT.bitsGT(Op.getValueType())) + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); + if (VT.bitsLT(Op.getValueType())) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + + return Op; + } + } + + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (truncate x)) -> (and x, mask) + if (N0.getOpcode() == ISD::TRUNCATE) { + // fold (zext (truncate (load x))) -> (zext (smaller load x)) + // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode *oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + EVT SrcVT = N0.getOperand(0).getValueType(); + EVT MinVT = N0.getValueType(); + + // Try to mask before the extension to avoid having to generate a larger mask, + // possibly over several sub-vectors. + if (SrcVT.bitsLT(VT)) { + if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) && + TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) { + SDValue Op = N0.getOperand(0); + Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + AddToWorklist(Op.getNode()); + return DAG.getZExtOrTrunc(Op, SDLoc(N), VT); + } + } + + if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) { + SDValue Op = N0.getOperand(0); + if (SrcVT.bitsLT(VT)) { + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } else if (SrcVT.bitsGT(VT)) { + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); + AddToWorklist(Op.getNode()); + } + return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType()); + } + } + + // Fold (zext (and (trunc x), cst)) -> (and x, cst), + // if either of the casts is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); + } + + // fold (zext (load x)) -> (zext (truncate (zextload x))) + // Only generate vector extloads when 1) they're legal, and 2) they are + // deemed desirable by the target. + if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + ((!LegalOperations && !VT.isVector() && + !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); + if (VT.isVector()) + DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0)); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (zext (load x)) to multiple smaller zextloads. + // Only on illegal but splittable vectors. + if (SDValue ExtLoad = CombineExtLoad(N)) + return ExtLoad; + + // fold (zext (and/or/xor (load x), cst)) -> + // (and/or/xor (zextload x), (zext cst)) + // Unless (and (load x) cst) will match as a zextload already and has + // additional users. + if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || + N0.getOpcode() == ISD::XOR) && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(1).getOpcode() == ISD::Constant && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) && + (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); + if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) { + if (N0.getOpcode() == ISD::AND) { + auto *AndC = cast<ConstantSDNode>(N0.getOperand(1)); + auto NarrowLoad = false; + EVT LoadResultTy = AndC->getValueType(0); + EVT ExtVT, LoadedVT; + if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT, + NarrowLoad)) + DoXform = false; + } + if (DoXform) + DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), + ISD::ZERO_EXTEND, SetCCs, TLI); + } + if (DoXform) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL(N); + SDValue And = DAG.getNode(N0.getOpcode(), DL, VT, + ExtLoad, DAG.getConstant(Mask, DL, VT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, + SDLoc(N0.getOperand(0)), + N0.getOperand(0).getValueType(), ExtLoad); + CombineTo(N, And); + CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL, + ISD::ZERO_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // fold (zext (zextload x)) -> (zext (truncate (zextload x))) + // fold (zext ( extload x)) -> (zext (truncate (zextload x))) + if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && + ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + EVT MemVT = LN0->getMemoryVT(); + if ((!LegalOperations && !LN0->isVolatile()) || + TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), + ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + if (N0.getOpcode() == ISD::SETCC) { + if (!LegalOperations && VT.isVector() && + N0.getValueType().getVectorElementType() == MVT::i1) { + EVT N0VT = N0.getOperand(0).getValueType(); + if (getSetCCResultType(N0VT) == N0.getValueType()) + return SDValue(); + + // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. + // Only do this before legalize for now. + EVT EltVT = VT.getVectorElementType(); + SDLoc DL(N); + SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), + DAG.getConstant(1, DL, EltVT)); + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSetCC(DL, VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()), + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, + OneOps)); + + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/sign extend + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getNode(ISD::AND, DL, VT, + DAG.getSExtOrTrunc(VsetCC, DL, VT), + DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps)); + } + + // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); + SDValue SCC = + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) return SCC; + } + + // (zext (shl (zext x), cst)) -> (shl (zext x), cst) + if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && + isa<ConstantSDNode>(N0.getOperand(1)) && + N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && + N0.hasOneUse()) { + SDValue ShAmt = N0.getOperand(1); + unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); + if (N0.getOpcode() == ISD::SHL) { + SDValue InnerZExt = N0.getOperand(0); + // If the original shl may be shifting out bits, do not perform this + // transformation. + unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - + InnerZExt.getOperand(0).getValueType().getSizeInBits(); + if (ShAmtVal > KnownZeroBits) + return SDValue(); + } + + SDLoc DL(N); + + // Ensure that the shift amount is wide enough for the shifted value. + if (VT.getSizeInBits() >= 256) + ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); + + return DAG.getNode(N0.getOpcode(), DL, VT, + DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), + ShAmt); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + // fold (aext (aext x)) -> (aext x) + // fold (aext (zext x)) -> (zext x) + // fold (aext (sext x)) -> (sext x) + if (N0.getOpcode() == ISD::ANY_EXTEND || + N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); + + // fold (aext (truncate (load x))) -> (aext (smaller load x)) + // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) + if (N0.getOpcode() == ISD::TRUNCATE) { + if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) { + SDNode* oye = N0.getNode()->getOperand(0).getNode(); + if (NarrowLoad.getNode() != N0.getNode()) { + CombineTo(N0.getNode(), NarrowLoad); + // CombineTo deleted the truncate, if needed, but not what's under it. + AddToWorklist(oye); + } + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (truncate x)) + if (N0.getOpcode() == ISD::TRUNCATE) { + SDValue TruncOp = N0.getOperand(0); + if (TruncOp.getValueType() == VT) + return TruncOp; // x iff x size == zext size. + if (TruncOp.getValueType().bitsGT(VT)) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); + } + + // Fold (aext (and (trunc x), cst)) -> (and x, cst) + // if the trunc is not free. + if (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::TRUNCATE && + N0.getOperand(1).getOpcode() == ISD::Constant && + !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), + N0.getValueType())) { + SDValue X = N0.getOperand(0).getOperand(0); + if (X.getValueType().bitsLT(VT)) { + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); + } else if (X.getValueType().bitsGT(VT)) { + X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); + } + APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + Mask = Mask.zext(VT.getSizeInBits()); + SDLoc DL(N); + return DAG.getNode(ISD::AND, DL, VT, + X, DAG.getConstant(Mask, DL, VT)); + } + + // fold (aext (load x)) -> (aext (truncate (extload x))) + // None of the supported targets knows how to perform load and any_ext + // on vectors in one instruction. We only perform this transformation on + // scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && + ISD::isUNINDEXEDLoad(N0.getNode()) && + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + bool DoXform = true; + SmallVector<SDNode*, 4> SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), + ISD::ANY_EXTEND); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + // fold (aext (zextload x)) -> (aext (truncate (zextload x))) + // fold (aext (sextload x)) -> (aext (truncate (sextload x))) + // fold (aext ( extload x)) -> (aext (truncate (extload x))) + if (N0.getOpcode() == ISD::LOAD && + !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse()) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + ISD::LoadExtType ExtType = LN0->getExtensionType(); + EVT MemVT = LN0->getMemoryVT(); + if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) { + SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), + VT, LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), + N0.getValueType(), ExtLoad), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + + if (N0.getOpcode() == ISD::SETCC) { + // For vectors: + // aext(setcc) -> vsetcc + // aext(setcc) -> truncate(vsetcc) + // aext(setcc) -> aext(vsetcc) + // Only do this before legalize for now. + if (VT.isVector() && !LegalOperations) { + EVT N0VT = N0.getOperand(0).getValueType(); + // We know that the # elements of the results is the same as the + // # elements of the compare (and the # elements of the compare result + // for that matter). Check to see that they are the same size. If so, + // we know that the element size of the sext'd result matches the + // element size of the compare operands. + if (VT.getSizeInBits() == N0VT.getSizeInBits()) + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source + // elements we can use a matching integer vector type and then + // truncate/any extend + else { + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); + SDValue VsetCC = + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); + } + } + + // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc + SDLoc DL(N); + SDValue SCC = + SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), + DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT), + cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); + if (SCC.getNode()) + return SCC; + } + + return SDValue(); +} + +/// See if the specified operand can be simplified with the knowledge that only +/// the bits specified by Mask are used. If so, return the simpler operand, +/// otherwise return a null SDValue. +SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { + switch (V.getOpcode()) { + default: break; + case ISD::Constant: { + const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); + assert(CV && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) + return DAG.getConstant(NewVal, SDLoc(V), V.getValueType()); + break; + } + case ISD::OR: + case ISD::XOR: + // If the LHS or RHS don't contribute bits to the or, drop them. + if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) + return V.getOperand(1); + if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) + return V.getOperand(0); + break; + case ISD::SRL: + // Only look at single-use SRLs. + if (!V.getNode()->hasOneUse()) + break; + if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) { + // See if we can recursively simplify the LHS. + unsigned Amt = RHSC->getZExtValue(); + + // Watch out for shift count overflow though. + if (Amt >= Mask.getBitWidth()) break; + APInt NewMask = Mask << Amt; + if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask)) + return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), + SimplifyLHS, V.getOperand(1)); + } + } + return SDValue(); +} + +/// If the result of a wider load is shifted to right of N bits and then +/// truncated to a narrower type and where N is a multiple of number of bits of +/// the narrower type, transform it to a narrower load from address + N / num of +/// bits of new type. If the result is to be extended, also fold the extension +/// to form a extending load. +SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { + unsigned Opc = N->getOpcode(); + + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT ExtVT = VT; + + // This transformation isn't valid for vector loads. + if (VT.isVector()) + return SDValue(); + + // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then + // extended to VT. + if (Opc == ISD::SIGN_EXTEND_INREG) { + ExtType = ISD::SEXTLOAD; + ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + } else if (Opc == ISD::SRL) { + // Another special-case: SRL is basically zero-extending a narrower value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); + } + if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT)) + return SDValue(); + + unsigned EVTBits = ExtVT.getSizeInBits(); + + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (!ExtVT.isRound()) + return SDValue(); + + unsigned ShAmt = 0; + if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShAmt = N01->getZExtValue(); + // Is the shift amount a multiple of size of VT? + if ((ShAmt & (EVTBits-1)) == 0) { + N0 = N0.getOperand(0); + // Is the load width a multiple of size of VT? + if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) + return SDValue(); + } + + // At this point, we must have a load or else we can't do the transform. + if (!isa<LoadSDNode>(N0)) return SDValue(); + + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); + + // If the shift amount is larger than the input type then we're not + // accessing any of the loaded bytes. If the load was a zextload/extload + // then the result of the shift+trunc is zero/undef (handled elsewhere). + if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) + return SDValue(); + } + } + + // If the load is shifted left (and the result isn't shifted back right), + // we can fold the truncate through the shift. + unsigned ShLeftAmt = 0; + if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && + ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { + if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + ShLeftAmt = N01->getZExtValue(); + N0 = N0.getOperand(0); + } + } + + // If we haven't found a load, we can't narrow it. Don't transform one with + // multiple uses, this would require adding a new load. + if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) + return SDValue(); + + // Don't change the width of a volatile load. + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (LN0->isVolatile()) + return SDValue(); + + // Verify that we are actually reducing a load width here. + if (LN0->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LN0->getNumValues() > 2) + return SDValue(); + + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LN0->getExtensionType() != ISD::NON_EXTLOAD && + LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return SDValue(); + + if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT)) + return SDValue(); + + EVT PtrType = N0.getOperand(1).getValueType(); + + if (PtrType == MVT::Untyped || PtrType.isExtended()) + // It's not possible to generate a constant of extended or untyped type. + return SDValue(); + + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (DAG.getDataLayout().isBigEndian()) { + unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); + unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); + ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; + } + + uint64_t PtrOff = ShAmt / 8; + unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); + SDLoc DL(LN0); + // The original load itself didn't wrap, so an offset within it doesn't. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + SDValue NewPtr = DAG.getNode(ISD::ADD, DL, + PtrType, LN0->getBasePtr(), + DAG.getConstant(PtrOff, DL, PtrType), + &Flags); + AddToWorklist(NewPtr.getNode()); + + SDValue Load; + if (ExtType == ISD::NON_EXTLOAD) + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + else + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, + LN0->getPointerInfo().getWithOffset(PtrOff), + ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign, LN0->getAAInfo()); + + // Replace the old load's chain with the new load's chain. + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + + // Shift the result left, if we've swallowed a left shift. + SDValue Result = Load; + if (ShLeftAmt != 0) { + EVT ShImmTy = getShiftAmountTy(Result.getValueType()); + if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) + ShImmTy = VT; + // If the shift amount is as large as the result size (but, presumably, + // no larger than the source) then the useful bits of the result are + // zero; we can't simply return the shortened shift, because the result + // of that operation is undefined. + SDLoc DL(N0); + if (ShLeftAmt >= VT.getSizeInBits()) + Result = DAG.getConstant(0, DL, VT); + else + Result = DAG.getNode(ISD::SHL, DL, VT, + Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy)); + } + + // Return the new loaded value. + return Result; +} + +SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N1)->getVT(); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned EVTBits = EVT.getScalarType().getSizeInBits(); + + if (N0.isUndef()) + return DAG.getUNDEF(VT); + + // fold (sext_in_reg c1) -> c1 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); + + // If the input is already sign extended, just drop the extension. + if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) + return N0; + + // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 + if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, + N0.getOperand(0), N1); + + // fold (sext_in_reg (sext x)) -> (sext x) + // fold (sext_in_reg (aext x)) -> (sext x) + // if x is small enough. + if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && + (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); + } + + // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. + if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); + + // fold operands of sext_in_reg based on knowledge that the top bits are not + // demanded. + if (SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + // fold (sext_in_reg (load x)) -> (smaller sextload x) + // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) + if (SDValue NarrowLoad = ReduceLoadWidth(N)) + return NarrowLoad; + + // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) + // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. + // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. + if (N0.getOpcode() == ISD::SRL) { + if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) + if (ShAmt->getZExtValue()+EVTBits <= VTBits) { + // We can turn this into an SRA iff the input to the SRL is already sign + // extended enough. + unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); + if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) + return DAG.getNode(ISD::SRA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1)); + } + } + + // fold (sext_inreg (extload x)) -> (sextload x) + if (ISD::isEXTLoad(N0.getNode()) && + ISD::isUNINDEXEDLoad(N0.getNode()) && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorklist(ExtLoad.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use + if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && + N0.hasOneUse() && + EVT == cast<LoadSDNode>(N0)->getMemoryVT() && + ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || + TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) + if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, + BSwap, N1); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, + LegalOperations)) + return SDValue(Res, 0); + + return SDValue(); +} + +SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + bool isLE = DAG.getDataLayout().isLittleEndian(); + + // noop truncate + if (N0.getValueType() == N->getValueType(0)) + return N0; + // fold (truncate c1) -> c1 + if (isConstantIntBuildVectorOrConstantInt(N0)) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); + // fold (truncate (truncate x)) -> (truncate x) + if (N0.getOpcode() == ISD::TRUNCATE) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + // fold (truncate (ext x)) -> (ext x) or (truncate x) or x + if (N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::SIGN_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND) { + if (N0.getOperand(0).getValueType().bitsLT(VT)) + // if the source is smaller than the dest, we still need an extend + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, + N0.getOperand(0)); + if (N0.getOperand(0).getValueType().bitsGT(VT)) + // if the source is larger than the dest, than we just need the truncate + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); + // if the source and dest are the same type, we can drop both the extend + // and the truncate. + return N0.getOperand(0); + } + + // Fold extract-and-trunc into a narrow extract. For example: + // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) + // i32 y = TRUNCATE(i64 x) + // -- becomes -- + // v16i8 b = BITCAST (v2i64 val) + // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) + // + // Note: We only run this optimization after type legalization (which often + // creates this pattern) and before operation legalization after which + // we need to be more careful about the vector instructions that we generate. + if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { + + EVT VecTy = N0.getOperand(0).getValueType(); + EVT ExTy = N0.getValueType(); + EVT TrTy = N->getValueType(0); + + unsigned NumElem = VecTy.getVectorNumElements(); + unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); + assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); + + SDValue EltNo = N0->getOperand(1); + if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); + + SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), + NVT, N0.getOperand(0)); + + SDLoc DL(N); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, + DL, TrTy, V, + DAG.getConstant(Index, DL, IndexTy)); + } + } + + // trunc (select c, a, b) -> select c, (trunc a), (trunc b) + if (N0.getOpcode() == ISD::SELECT) { + EVT SrcVT = N0.getValueType(); + if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) && + TLI.isTruncateFree(SrcVT, VT)) { + SDLoc SL(N0); + SDValue Cond = N0.getOperand(0); + SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1)); + SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2)); + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1); + } + } + + // Fold a series of buildvector, bitcast, and truncate if possible. + // For example fold + // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to + // (2xi32 (buildvector x, y)). + if (Level == AfterLegalizeVectorOps && VT.isVector() && + N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N0.getOperand(0).hasOneUse()) { + + SDValue BuildVect = N0.getOperand(0); + EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); + EVT TruncVecEltTy = VT.getVectorElementType(); + + // Check that the element types match. + if (BuildVectEltTy == TruncVecEltTy) { + // Now we only need to compute the offset of the truncated elements. + unsigned BuildVecNumElts = BuildVect.getNumOperands(); + unsigned TruncVecNumElts = VT.getVectorNumElements(); + unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + + assert((BuildVecNumElts % TruncVecNumElts) == 0 && + "Invalid number of elements"); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + Opnds.push_back(BuildVect.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + } + } + + // See if we can simplify the input to this truncate through knowledge that + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currently we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); + } + // fold (truncate (load x)) -> (smaller load x) + // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + if (SDValue Reduced = ReduceLoadWidth(N)) + return Reduced; + + // Handle the case where the load remains an extending load even + // after truncation. + if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + if (!LN0->isVolatile() && + LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { + SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), + VT, LN0->getChain(), LN0->getBasePtr(), + LN0->getMemoryVT(), + LN0->getMemOperand()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); + return NewLoad; + } + } + } + // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), + // where ... are all 'undef'. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { + SmallVector<EVT, 8> VTs; + SDValue V; + unsigned Idx = 0; + unsigned NumDefs = 0; + + for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { + SDValue X = N0.getOperand(i); + if (X.getOpcode() != ISD::UNDEF) { + V = X; + Idx = i; + NumDefs++; + } + // Stop if more than one members are non-undef. + if (NumDefs > 1) + break; + VTs.push_back(EVT::getVectorVT(*DAG.getContext(), + VT.getVectorElementType(), + X.getValueType().getVectorNumElements())); + } + + if (NumDefs == 0) + return DAG.getUNDEF(VT); + + if (NumDefs == 1) { + assert(V.getNode() && "The single defined operand is empty!"); + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (i != Idx) { + Opnds.push_back(DAG.getUNDEF(VTs[i])); + continue; + } + SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); + AddToWorklist(NV.getNode()); + Opnds.push_back(NV); + } + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); + } + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + + return SDValue(); +} + +static SDNode *getBuildPairElt(SDNode *N, unsigned i) { + SDValue Elt = N->getOperand(i); + if (Elt.getOpcode() != ISD::MERGE_VALUES) + return Elt.getNode(); + return Elt.getOperand(Elt.getResNo()).getNode(); +} + +/// build_pair (load, load) -> load +/// if load locations are consecutive. +SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { + assert(N->getOpcode() == ISD::BUILD_PAIR); + + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || + LD1->getAddressSpace() != LD2->getAddressSpace()) + return SDValue(); + EVT LD1VT = LD1->getValueType(0); + + if (ISD::isNON_EXTLoad(LD2) && + LD2->hasOneUse() && + // If both are volatile this would reduce the number of volatile loads. + // If one is volatile it might be ok, but play conservative and bail out. + !LD1->isVolatile() && + !LD2->isVolatile() && + DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { + unsigned Align = LD1->getAlignment(); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign <= Align && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), + LD1->getBasePtr(), LD1->getPointerInfo(), + false, false, false, Align); + } + + return SDValue(); +} + +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + +SDValue DAGCombiner::visitBITCAST(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // If the input is a BUILD_VECTOR with all constant elements, fold this now. + // Only do this before legalize, since afterward the target may be depending + // on the bitconvert. + // First check to see if this is all constant. + if (!LegalTypes && + N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && + VT.isVector()) { + bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); + + EVT DestEltVT = N->getValueType(0).getVectorElementType(); + assert(!DestEltVT.isVector() && + "Element type of vector ValueType must not be vector!"); + if (isSimple) + return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); + } + + // If the input is a constant, let getNode fold it. + if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { + // If we can't allow illegal operations, we need to check that this is just + // a fp -> int or int -> conversion and that the resulting operation will + // be legal. + if (!LegalOperations || + (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() && + TLI.isOperationLegal(ISD::ConstantFP, VT)) || + (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() && + TLI.isOperationLegal(ISD::Constant, VT))) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); + } + + // (conv (conv x, t1), t2) -> (conv x, t2) + if (N0.getOpcode() == ISD::BITCAST) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, + N0.getOperand(0)); + + // fold (conv (load x)) -> (load (conv*)x) + // If the resultant load doesn't need a higher alignment than the original! + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + // Do not change the width of a volatile load. + !cast<LoadSDNode>(N0)->isVolatile() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) == + TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) && + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + VT.getTypeForEVT(*DAG.getContext())); + unsigned OrigAlign = LN0->getAlignment(); + + if (Align <= OrigAlign) { + SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), + LN0->getBasePtr(), LN0->getPointerInfo(), + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), OrigAlign, + LN0->getAAInfo()); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); + return Load; + } + } + + // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) + // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // This often reduces constant pool loads. + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && + N0.getNode()->hasOneUse() && VT.isInteger() && + !VT.isVector() && !N0.getValueType().isVector()) { + SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, + N0.getOperand(0)); + AddToWorklist(NewConv.getNode()); + + SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + if (N0.getOpcode() == ISD::FNEG) + return DAG.getNode(ISD::XOR, DL, VT, + NewConv, DAG.getConstant(SignBit, DL, VT)); + assert(N0.getOpcode() == ISD::FABS); + return DAG.getNode(ISD::AND, DL, VT, + NewConv, DAG.getConstant(~SignBit, DL, VT)); + } + + // fold (bitconvert (fcopysign cst, x)) -> + // (or (and (bitconvert x), sign), (and cst, (not sign))) + // Note that we don't handle (copysign x, cst) because this can always be + // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && + isa<ConstantFPSDNode>(N0.getOperand(0)) && + VT.isInteger() && !VT.isVector()) { + unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); + EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); + if (isTypeLegal(IntXVT)) { + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), + IntXVT, N0.getOperand(1)); + AddToWorklist(X.getNode()); + + // If X has a different width than the result/lhs, sext it or truncate it. + unsigned VTWidth = VT.getSizeInBits(); + if (OrigXWidth < VTWidth) { + X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); + AddToWorklist(X.getNode()); + } else if (OrigXWidth > VTWidth) { + // To get the sign bit in the right place, we have to shift it right + // before truncating. + SDLoc DL(X); + X = DAG.getNode(ISD::SRL, DL, + X.getValueType(), X, + DAG.getConstant(OrigXWidth-VTWidth, DL, + X.getValueType())); + AddToWorklist(X.getNode()); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); + AddToWorklist(X.getNode()); + } + + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } + APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); + X = DAG.getNode(ISD::AND, SDLoc(X), VT, + X, DAG.getConstant(SignBit, SDLoc(X), VT)); + AddToWorklist(X.getNode()); + + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), + VT, N0.getOperand(0)); + Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, + Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT)); + AddToWorklist(Cst.getNode()); + + return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); + } + } + + // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. + if (N0.getOpcode() == ISD::BUILD_PAIR) + if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT)) + return CombineLD; + + // Remove double bitcasts from shuffles - this is often a legacy of + // XformToShuffleWithZero being used to combine bitmaskings (of + // float vectors bitcast to integer vectors) into shuffles. + // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1) + if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() && + N0->getOpcode() == ISD::VECTOR_SHUFFLE && + VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() && + !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) { + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0); + + // If operands are a bitcast, peek through if it casts the original VT. + // If operands are a constant, just bitcast back to original VT. + auto PeekThroughBitcast = [&](SDValue Op) { + if (Op.getOpcode() == ISD::BITCAST && + Op.getOperand(0).getValueType() == VT) + return SDValue(Op.getOperand(0)); + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); + return SDValue(); + }; + + SDValue SV0 = PeekThroughBitcast(N0->getOperand(0)); + SDValue SV1 = PeekThroughBitcast(N0->getOperand(1)); + if (!(SV0 && SV1)) + return SDValue(); + + int MaskScale = + VT.getVectorNumElements() / N0.getValueType().getVectorNumElements(); + SmallVector<int, 8> NewMask; + for (int M : SVN->getMask()) + for (int i = 0; i != MaskScale; ++i) + NewMask.push_back(M < 0 ? -1 : M * MaskScale + i); + + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, VT); + } + + if (LegalMask) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { + EVT VT = N->getValueType(0); + return CombineConsecutiveLoads(N, VT); +} + +/// We know that BV is a build_vector node with Constant, ConstantFP or Undef +/// operands. DstEltVT indicates the destination element value type. +SDValue DAGCombiner:: +ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { + EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); + + // If this is already the right type, we're done. + if (SrcEltVT == DstEltVT) return SDValue(BV, 0); + + unsigned SrcBitSize = SrcEltVT.getSizeInBits(); + unsigned DstBitSize = DstEltVT.getSizeInBits(); + + // If this is a conversion of N elements of one type to N elements of another + // type, convert each element. This handles FP<->INT cases. + if (SrcBitSize == DstBitSize) { + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + BV->getValueType(0).getVectorNumElements()); + + // Due to the FP element handling below calling this routine recursively, + // we can end up with a scalar-to-vector node here. + if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, + DAG.getNode(ISD::BITCAST, SDLoc(BV), + DstEltVT, BV->getOperand(0))); + + SmallVector<SDValue, 8> Ops; + for (SDValue Op : BV->op_values()) { + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated. Make that explicit here. + if (Op.getValueType() != SrcEltVT) + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), + DstEltVT, Op)); + AddToWorklist(Ops.back().getNode()); + } + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); + } + + // Otherwise, we're growing or shrinking the elements. To avoid having to + // handle annoying details of growing/shrinking FP values, we convert them to + // int first. + if (SrcEltVT.isFloatingPoint()) { + // Convert the input float vector to a int vector where the elements are the + // same sizes. + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); + BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); + SrcEltVT = IntVT; + } + + // Now we know the input is an integer vector. If the output is a FP type, + // convert to integer first, then to FP of the right size. + if (DstEltVT.isFloatingPoint()) { + EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); + SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); + + // Next, convert to FP elements of the same size. + return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); + } + + SDLoc DL(BV); + + // Okay, we know the src/dst types are both integers of differing types. + // Handling growing first. + assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); + if (SrcBitSize < DstBitSize) { + unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0, e = BV->getNumOperands(); i != e; + i += NumInputsPerOutput) { + bool isLE = DAG.getDataLayout().isLittleEndian(); + APInt NewBits = APInt(DstBitSize, 0); + bool EltIsUndef = true; + for (unsigned j = 0; j != NumInputsPerOutput; ++j) { + // Shift the previously computed bits over. + NewBits <<= SrcBitSize; + SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); + if (Op.getOpcode() == ISD::UNDEF) continue; + EltIsUndef = false; + + NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). + zextOrTrunc(SrcBitSize).zext(DstBitSize); + } + + if (EltIsUndef) + Ops.push_back(DAG.getUNDEF(DstEltVT)); + else + Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT)); + } + + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + + // Finally, this must be the case where we are shrinking elements: each input + // turns into multiple outputs. + unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; + EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, + NumOutputsPerInput*BV->getNumOperands()); + SmallVector<SDValue, 8> Ops; + + for (const SDValue &Op : BV->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) { + Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT)); + continue; + } + + APInt OpVal = cast<ConstantSDNode>(Op)-> + getAPIntValue().zextOrTrunc(SrcBitSize); + + for (unsigned j = 0; j != NumOutputsPerInput; ++j) { + APInt ThisVal = OpVal.trunc(DstBitSize); + Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT)); + OpVal = OpVal.lshr(DstBitSize); + } + + // For big endian targets, swap the order of the pieces of each element. + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); +} + +/// Try to perform FMA combining on a given FADD node. +SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); + + // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)), + // prefer to fold the multiply with fewer uses. + if (Aggressive && N0.getOpcode() == ISD::FMUL && + N1.getOpcode() == ISD::FMUL) { + if (N0.getNode()->use_size() > N1.getNode()->use_size()) + std::swap(N0, N1); + } + + // fold (fadd (fmul x, y), z) -> (fma x, y, z) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + + // fold (fadd x, (fmul y, z)) -> (fma y, z, x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), N0); + } + + // Look through FP_EXTEND nodes to do more combining. + if (AllowFusion && LookThroughFPExt) { + // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), N1); + } + + // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x) + // Note: Commutes FADD operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), N0); + } + } + + // More folding opportunities when target permits. + if ((AllowFusion || HasFMAD) && Aggressive) { + // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z)) + if (N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + N1)); + } + + // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x)) + if (N1->getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(0), N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N1.getOperand(2).getOperand(0), + N1.getOperand(2).getOperand(1), + N0)); + } + + if (AllowFusion && LookThroughFPExt) { + // fold (fadd (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y, (fma (fpext u), (fpext v), z)) + auto FoldFAddFMAFPExtFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1), + N020.getOperand(0), N020.getOperand(1), + N1); + } + } + + // fold (fadd (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + auto FoldFAddFPExtFMAFMul = [&] ( + SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, X), + DAG.getNode(ISD::FP_EXTEND, SL, VT, Y), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, U), + DAG.getNode(ISD::FP_EXTEND, SL, VT, V), + Z)); + }; + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1), + N002.getOperand(0), N002.getOperand(1), + N1); + } + } + + // fold (fadd x, (fma y, z, (fpext (fmul u, v))) + // -> (fma y, z, (fma (fpext u), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode) { + SDValue N12 = N1.getOperand(2); + if (N12.getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N12.getOperand(0); + if (N120.getOpcode() == ISD::FMUL) + return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1), + N120.getOperand(0), N120.getOperand(1), + N0); + } + } + + // fold (fadd x, (fpext (fma y, z, (fmul u, v))) + // -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == PreferredFusedOpcode) { + SDValue N102 = N10.getOperand(2); + if (N102.getOpcode() == ISD::FMUL) + return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1), + N102.getOperand(0), N102.getOperand(1), + N0); + } + } + } + } + + return SDValue(); +} + +/// Try to perform FMA combining on a given FSUB node. +SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + bool LookThroughFPExt = TLI.isFPExtFree(VT); + + // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) + if (N0.getOpcode() == ISD::FMUL && + (Aggressive || N0->hasOneUse())) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FMUL && + (Aggressive || N1->hasOneUse())) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), N0); + + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && + N0.getOperand(0).getOpcode() == ISD::FMUL && + (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) { + SDValue N00 = N0.getOperand(0).getOperand(0); + SDValue N01 = N0.getOperand(0).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N00), N01, + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // Look through FP_EXTEND nodes to do more combining. + if (AllowFusion && LookThroughFPExt) { + // fold (fsub (fpext (fmul x, y)), z) + // -> (fma (fpext x), (fpext y), (fneg z)) + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, N1)); + } + + // fold (fsub x, (fpext (fmul y, z))) + // -> (fma (fneg (fpext y)), (fpext z), x) + // Note: Commutes FSUB operands. + if (N1.getOpcode() == ISD::FP_EXTEND) { + SDValue N10 = N1.getOperand(0); + if (N10.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(0))), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N10.getOperand(1)), + N0); + } + + // fold (fsub (fpext (fneg (fmul, x, y))), z) + // -> (fneg (fma (fpext x), (fpext y), z)) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FNEG) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + // fold (fsub (fneg (fpext (fmul, x, y))), z) + // -> (fneg (fma (fpext x)), (fpext y), z) + // Note: This could be removed with appropriate canonicalization of the + // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the + // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent + // from implementing the canonicalization in visitFSUB. + if (N0.getOpcode() == ISD::FNEG) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::FP_EXTEND) { + SDValue N000 = N00.getOperand(0); + if (N000.getOpcode() == ISD::FMUL) { + return DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N000.getOperand(1)), + N1)); + } + } + } + + } + + // More folding opportunities when target permits. + if ((AllowFusion || HasFMAD) && Aggressive) { + // fold (fsub (fma x, y, (fmul u, v)), z) + // -> (fma x, y (fma u, v, (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode && + N0.getOperand(2).getOpcode() == ISD::FMUL) { + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(2).getOperand(0), + N0.getOperand(2).getOperand(1), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + + // fold (fsub x, (fma y, z, (fmul u, v))) + // -> (fma (fneg y), z, (fma (fneg u), v, x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FMUL) { + SDValue N20 = N1.getOperand(2).getOperand(0); + SDValue N21 = N1.getOperand(2).getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N20), + + N21, N0)); + } + + if (AllowFusion && LookThroughFPExt) { + // fold (fsub (fma x, y, (fpext (fmul u, v))), z) + // -> (fma x, y (fma (fpext u), (fpext v), (fneg z))) + if (N0.getOpcode() == PreferredFusedOpcode) { + SDValue N02 = N0.getOperand(2); + if (N02.getOpcode() == ISD::FP_EXTEND) { + SDValue N020 = N02.getOperand(0); + if (N020.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + N0.getOperand(0), N0.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N020.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub (fpext (fma x, y, (fmul u, v))), z) + // -> (fma (fpext x), (fpext y), + // (fma (fpext u), (fpext v), (fneg z))) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N0.getOpcode() == ISD::FP_EXTEND) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == PreferredFusedOpcode) { + SDValue N002 = N00.getOperand(2); + if (N002.getOpcode() == ISD::FMUL) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N00.getOperand(1)), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(0)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N002.getOperand(1)), + DAG.getNode(ISD::FNEG, SL, VT, + N1))); + } + } + + // fold (fsub x, (fma y, z, (fpext (fmul u, v)))) + // -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x)) + if (N1.getOpcode() == PreferredFusedOpcode && + N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) { + SDValue N120 = N1.getOperand(2).getOperand(0); + if (N120.getOpcode() == ISD::FMUL) { + SDValue N1200 = N120.getOperand(0); + SDValue N1201 = N120.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)), + N1.getOperand(1), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1200)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1201), + N0)); + } + } + + // fold (fsub x, (fpext (fma y, z, (fmul u, v)))) + // -> (fma (fneg (fpext y)), (fpext z), + // (fma (fneg (fpext u)), (fpext v), x)) + // FIXME: This turns two single-precision and one double-precision + // operation into two double-precision operations, which might not be + // interesting for all targets, especially GPUs. + if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == PreferredFusedOpcode) { + SDValue N100 = N1.getOperand(0).getOperand(0); + SDValue N101 = N1.getOperand(0).getOperand(1); + SDValue N102 = N1.getOperand(0).getOperand(2); + if (N102.getOpcode() == ISD::FMUL) { + SDValue N1020 = N102.getOperand(0); + SDValue N1021 = N102.getOperand(1); + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N100)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, N101), + DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, + DAG.getNode(ISD::FP_EXTEND, SL, + VT, N1020)), + DAG.getNode(ISD::FP_EXTEND, SL, VT, + N1021), + N0)); + } + } + } + } + + return SDValue(); +} + +/// Try to perform FMA combining on a given FMUL node. +SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + SDLoc SL(N); + + assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation"); + + const TargetOptions &Options = DAG.getTarget().Options; + bool AllowFusion = + (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); + + // Floating-point multiply-add with intermediate rounding. + bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + + // Floating-point multiply-add without intermediate rounding. + bool HasFMA = + AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); + + // No valid opcode, do not combine. + if (!HasFMAD && !HasFMA) + return SDValue(); + + // Always prefer FMAD to FMA for precision. + unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA; + bool Aggressive = TLI.enableAggressiveFMAFusion(VT); + + // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y) + // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y)) + auto FuseFADD = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFADD(N0, N1)) + return FMA; + if (SDValue FMA = FuseFADD(N1, N0)) + return FMA; + + // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y) + // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y)) + // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y)) + // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y) + auto FuseFSUB = [&](SDValue X, SDValue Y) { + if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { + auto XC0 = isConstOrConstSplatFP(X.getOperand(0)); + if (XC0 && XC0->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + Y); + if (XC0 && XC0->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, + DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + + auto XC1 = isConstOrConstSplatFP(X.getOperand(1)); + if (XC1 && XC1->isExactlyValue(+1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, + DAG.getNode(ISD::FNEG, SL, VT, Y)); + if (XC1 && XC1->isExactlyValue(-1.0)) + return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); + } + return SDValue(); + }; + + if (SDValue FMA = FuseFSUB(N0, N1)) + return FMA; + if (SDValue FMA = FuseFSUB(N1, N0)) + return FMA; + + return SDValue(); +} + +SDValue DAGCombiner::visitFADD(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0); + bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (fadd c1, c2) -> c1 + c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags); + + // canonicalize constant to RHS + if (N0CFP && !N1CFP) + return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags); + + // fold (fadd A, (fneg B)) -> (fsub A, B) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2) + return DAG.getNode(ISD::FSUB, DL, VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations), Flags); + + // fold (fadd (fneg A), B) -> (fsub B, A) + if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && + isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2) + return DAG.getNode(ISD::FSUB, DL, VT, N1, + GetNegatedExpression(N0, DAG, LegalOperations), Flags); + + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // No FP constant should be created after legalization as Instruction + // Selection pass has a hard time dealing with FP constants. + bool AllowNewConst = (Level < AfterLegalizeDAG); + + // fold (fadd A, 0) -> A + if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1)) + if (N1C->isZero()) + return N0; + + // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) + if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) + return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, + Flags), + Flags); + + // If allowed, fold (fadd (fneg x), x) -> 0.0 + if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) + return DAG.getConstantFP(0.0, DL, VT); + + // If allowed, fold (fadd x, (fneg x)) -> 0.0 + if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) + return DAG.getConstantFP(0.0, DL, VT); + + // We can fold chains of FADD's of the same value into multiplications. + // This transform is not safe in general because we are reducing the number + // of rounding steps. + if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) { + if (N0.getOpcode() == ISD::FMUL) { + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)); + + // (fadd (fmul x, c), x) -> (fmul x, c+1) + if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags); + } + + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) + if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags); + } + } + + if (N1.getOpcode() == ISD::FMUL) { + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1)); + + // (fadd x, (fmul x, c)) -> (fmul x, c+1) + if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags); + } + + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1), + DAG.getConstantFP(2.0, DL, VT), Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags); + } + } + + if (N0.getOpcode() == ISD::FADD && AllowNewConst) { + bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul x, 3.0) + if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, DL, VT, + N1, DAG.getConstantFP(3.0, DL, VT), Flags); + } + } + + if (N1.getOpcode() == ISD::FADD && AllowNewConst) { + bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, DL, VT, + N0, DAG.getConstantFP(3.0, DL, VT), Flags); + } + } + + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) + if (AllowNewConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N1.getOperand(1) && + N0.getOperand(0) == N1.getOperand(0)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), + DAG.getConstantFP(4.0, DL, VT), Flags); + } + } + } // enable-unsafe-fp-math + + // FADD -> FMA combines: + if (SDValue Fused = visitFADDForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFSUB(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + EVT VT = N->getValueType(0); + SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (fsub c1, c2) -> c1-c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags); + + // fold (fsub A, (fneg B)) -> (fadd A, B) + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return DAG.getNode(ISD::FADD, dl, VT, N0, + GetNegatedExpression(N1, DAG, LegalOperations), Flags); + + // If 'unsafe math' is enabled, fold lots of things. + if (Options.UnsafeFPMath) { + // (fsub A, 0) -> A + if (N1CFP && N1CFP->isZero()) + return N0; + + // (fsub 0, B) -> -B + if (N0CFP && N0CFP->isZero()) { + if (isNegatibleForFree(N1, LegalOperations, TLI, &Options)) + return GetNegatedExpression(N1, DAG, LegalOperations); + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, dl, VT, N1); + } + + // (fsub x, x) -> 0.0 + if (N0 == N1) + return DAG.getConstantFP(0.0f, dl, VT); + + // (fsub x, (fadd x, y)) -> (fneg y) + // (fsub x, (fadd y, x)) -> (fneg y) + if (N1.getOpcode() == ISD::FADD) { + SDValue N10 = N1->getOperand(0); + SDValue N11 = N1->getOperand(1); + + if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options)) + return GetNegatedExpression(N11, DAG, LegalOperations); + + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options)) + return GetNegatedExpression(N10, DAG, LegalOperations); + } + } + + // FSUB -> FMA combines: + if (SDValue Fused = visitFSUBForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFMUL(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + + // fold vector ops + if (VT.isVector()) { + // This just handles C1 * C2 for vectors. Other vector folds are below. + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + } + + // fold (fmul c1, c2) -> c1*c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags); + + // canonicalize constant to RHS + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags); + + // fold (fmul A, 1.0) -> A + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return N0; + + if (Options.UnsafeFPMath) { + // fold (fmul A, 0) -> 0 + if (N1CFP && N1CFP->isZero()) + return N1; + + // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) + if (N0.getOpcode() == ISD::FMUL) { + // Fold scalars or any vector constants (not just splats). + // This fold is done in general by InstCombine, but extra fmul insts + // may have been generated during lowering. + SDValue N00 = N0.getOperand(0); + SDValue N01 = N0.getOperand(1); + auto *BV1 = dyn_cast<BuildVectorSDNode>(N1); + auto *BV00 = dyn_cast<BuildVectorSDNode>(N00); + auto *BV01 = dyn_cast<BuildVectorSDNode>(N01); + + // Check 1: Make sure that the first operand of the inner multiply is NOT + // a constant. Otherwise, we may induce infinite looping. + if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) { + // Check 2: Make sure that the second operand of the inner multiply and + // the second operand of the outer multiply are constants. + if ((N1CFP && isConstOrConstSplatFP(N01)) || + (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) { + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags); + } + } + } + + // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) + // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs + // during an early run of DAGCombiner can prevent folding with fmuls + // inserted during lowering. + if (N0.getOpcode() == ISD::FADD && + (N0.getOperand(0) == N0.getOperand(1)) && + N0.hasOneUse()) { + const SDValue Two = DAG.getConstantFP(2.0, DL, VT); + SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags); + return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags); + } + } + + // fold (fmul X, 2.0) -> (fadd X, X) + if (N1CFP && N1CFP->isExactlyValue(+2.0)) + return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags); + + // fold (fmul X, -1.0) -> (fneg X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N0); + + // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FMUL, DL, VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); + } + } + + // FMUL -> FMA combines: + if (SDValue Fused = visitFMULForFMACombine(N)) { + AddToWorklist(Fused.getNode()); + return Fused; + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFMA(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + SDLoc dl(N); + const TargetOptions &Options = DAG.getTarget().Options; + + // Constant fold FMA. + if (isa<ConstantFPSDNode>(N0) && + isa<ConstantFPSDNode>(N1) && + isa<ConstantFPSDNode>(N2)) { + return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2); + } + + if (Options.UnsafeFPMath) { + if (N0CFP && N0CFP->isZero()) + return N2; + if (N1CFP && N1CFP->isZero()) + return N2; + } + // TODO: The FMA node should have flags that propagate to these nodes. + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); + if (N1CFP && N1CFP->isExactlyValue(1.0)) + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); + + // Canonicalize (fma c, x, y) -> (fma x, c, y) + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); + + // TODO: FMA nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + + if (Options.UnsafeFPMath) { + // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) + if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1), + &Flags), &Flags); + } + + // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) + if (N0.getOpcode() == ISD::FMUL && + isConstantFPBuildVectorOrConstantFP(N1) && + isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) { + return DAG.getNode(ISD::FMA, dl, VT, + N0.getOperand(0), + DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1), + &Flags), + N2); + } + } + + // (fma x, 1, y) -> (fadd x, y) + // (fma x, -1, y) -> (fadd (fneg x), y) + if (N1CFP) { + if (N1CFP->isExactlyValue(1.0)) + // TODO: The FMA node should have flags that propagate to this node. + return DAG.getNode(ISD::FADD, dl, VT, N0, N2); + + if (N1CFP->isExactlyValue(-1.0) && + (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { + SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); + AddToWorklist(RHSNeg.getNode()); + // TODO: The FMA node should have flags that propagate to this node. + return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); + } + } + + if (Options.UnsafeFPMath) { + // (fma x, c, x) -> (fmul x, (c+1)) + if (N1CFP && N0 == N2) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(1.0, dl, VT), + &Flags), &Flags); + } + + // (fma x, c, (fneg x)) -> (fmul x, (c-1)) + if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, dl, VT, N0, + DAG.getNode(ISD::FADD, dl, VT, + N1, DAG.getConstantFP(-1.0, dl, VT), + &Flags), &Flags); + } + } + + return SDValue(); +} + +// Combine multiple FDIVs with the same divisor into multiple FMULs by the +// reciprocal. +// E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip) +// Notice that this is not always beneficial. One reason is different target +// may have different costs for FDIV and FMUL, so sometimes the cost of two +// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason +// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL". +SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { + bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath; + const SDNodeFlags *Flags = N->getFlags(); + if (!UnsafeMath && !Flags->hasAllowReciprocal()) + return SDValue(); + + // Skip if current node is a reciprocal. + SDValue N0 = N->getOperand(0); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + if (N0CFP && N0CFP->isExactlyValue(1.0)) + return SDValue(); + + // Exit early if the target does not want this transform or if there can't + // possibly be enough uses of the divisor to make the transform worthwhile. + SDValue N1 = N->getOperand(1); + unsigned MinUses = TLI.combineRepeatedFPDivisors(); + if (!MinUses || N1->use_size() < MinUses) + return SDValue(); + + // Find all FDIV users of the same divisor. + // Use a set because duplicates may be present in the user list. + SetVector<SDNode *> Users; + for (auto *U : N1->uses()) { + if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) { + // This division is eligible for optimization only if global unsafe math + // is enabled or if this division allows reciprocal formation. + if (UnsafeMath || U->getFlags()->hasAllowReciprocal()) + Users.insert(U); + } + } + + // Now that we have the actual number of divisor uses, make sure it meets + // the minimum threshold specified by the target. + if (Users.size() < MinUses) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags); + + // Dividend / Divisor -> Dividend * Reciprocal + for (auto *U : Users) { + SDValue Dividend = U->getOperand(0); + if (Dividend != FPOne) { + SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend, + Reciprocal, Flags); + CombineTo(U, NewNode); + } else if (U != Reciprocal.getNode()) { + // In the absence of fast-math-flags, this user node is always the + // same node as Reciprocal, but with FMF they may be different nodes. + CombineTo(U, Reciprocal); + } + } + return SDValue(N, 0); // N was replaced. +} + +SDValue DAGCombiner::visitFDIV(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + SDLoc DL(N); + const TargetOptions &Options = DAG.getTarget().Options; + SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags; + + // fold vector ops + if (VT.isVector()) + if (SDValue FoldedVOp = SimplifyVBinOp(N)) + return FoldedVOp; + + // fold (fdiv c1, c2) -> c1/c2 + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags); + + if (Options.UnsafeFPMath) { + // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. + if (N1CFP) { + // Compute the reciprocal 1.0 / c2. + APFloat N1APF = N1CFP->getValueAPF(); + APFloat Recip(N1APF.getSemantics(), 1); // 1.0 + APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); + // Only do the transform if the reciprocal is a legal fp immediate that + // isn't too nasty (eg NaN, denormal, ...). + if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty + (!LegalOperations || + // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM + // backend)... we should handle this gracefully after Legalize. + // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || + TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || + TLI.isFPImmLegal(Recip, VT))) + return DAG.getNode(ISD::FMUL, DL, VT, N0, + DAG.getConstantFP(Recip, DL, VT), Flags); + } + + // If this FDIV is part of a reciprocal square root, it may be folded + // into a target-specific square root estimate instruction. + if (N1.getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) { + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } else if (N1.getOpcode() == ISD::FP_EXTEND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { + RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } else if (N1.getOpcode() == ISD::FP_ROUND && + N1.getOperand(0).getOpcode() == ISD::FSQRT) { + if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0), + Flags)) { + RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1)); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } else if (N1.getOpcode() == ISD::FMUL) { + // Look through an FMUL. Even though this won't remove the FDIV directly, + // it's still worthwhile to get rid of the FSQRT if possible. + SDValue SqrtOp; + SDValue OtherOp; + if (N1.getOperand(0).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(0); + OtherOp = N1.getOperand(1); + } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) { + SqrtOp = N1.getOperand(1); + OtherOp = N1.getOperand(0); + } + if (SqrtOp.getNode()) { + // We found a FSQRT, so try to make this fold: + // x / (y * sqrt(z)) -> x * (rsqrt(z) / y) + if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) { + RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags); + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } + } + + // Fold into a reciprocal estimate and multiply instead of a real divide. + if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) { + AddToWorklist(RV.getNode()); + return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags); + } + } + + // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) + if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) { + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) { + // Both can be negated for free, check to see if at least one is cheaper + // negated. + if (LHSNeg == 2 || RHSNeg == 2) + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, + GetNegatedExpression(N0, DAG, LegalOperations), + GetNegatedExpression(N1, DAG, LegalOperations), + Flags); + } + } + + if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N)) + return CombineRepeatedDivisors; + + return SDValue(); +} + +SDValue DAGCombiner::visitFREM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + // fold (frem c1, c2) -> fmod(c1,c2) + if (N0CFP && N1CFP) + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, + &cast<BinaryWithFlagsSDNode>(N)->Flags); + + return SDValue(); +} + +SDValue DAGCombiner::visitFSQRT(SDNode *N) { + if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap()) + return SDValue(); + + // TODO: FSQRT nodes should have flags that propagate to the created nodes. + // For now, create a Flags object for use with all unsafe math transforms. + SDNodeFlags Flags; + Flags.setUnsafeAlgebra(true); + + // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5) + SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags); + if (!RV) + return SDValue(); + + EVT VT = RV.getValueType(); + SDLoc DL(N); + RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags); + AddToWorklist(RV.getNode()); + + // Unfortunately, RV is now NaN if the input was exactly 0. + // Select out this case and force the answer to 0. + SDValue Zero = DAG.getConstantFP(0.0, DL, VT); + EVT CCVT = getSetCCResultType(VT); + SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ); + AddToWorklist(ZeroCmp.getNode()); + AddToWorklist(RV.getNode()); + + return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, + ZeroCmp, Zero, RV); +} + +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + +SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + EVT VT = N->getValueType(0); + + if (N0CFP && N1CFP) // Constant fold + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); + + if (N1CFP) { + const APFloat& V = N1CFP->getValueAPF(); + // copysign(x, c1) -> fabs(x) iff ispos(c1) + // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) + if (!V.isNegative()) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + } else { + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); + } + } + + // copysign(fabs(x), y) -> copysign(x, y) + // copysign(fneg(x), y) -> copysign(x, y) + // copysign(copysign(x,z), y) -> copysign(x, y) + if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || + N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, + N0.getOperand(0), N1); + + // copysign(x, abs(y)) -> abs(x) + if (N1.getOpcode() == ISD::FABS) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + + // copysign(x, copysign(y,z)) -> copysign(x, z) + if (N1.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, + N0, N1.getOperand(1)); + + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, + N0, N1.getOperand(0)); + + return SDValue(); +} + +SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // fold (sint_to_fp c1) -> c1fp + if (isConstantIntBuildVectorOrConstantInt(N0) && + // ...but only if the target supports immediate floating-point values + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); + + // If the input is a legal type, and SINT_TO_FP is not legal on this target, + // but UINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to UINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); + } + + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { + // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && + !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + + // fold (sint_to_fp (zext (setcc x, y, cc))) -> + // (select_cc x, y, 1.0, 0.0,, cc) + if (N0.getOpcode() == ISD::ZERO_EXTEND && + N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); + SDValue Ops[] = + { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), + N0.getOperand(0).getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = N0.getValueType(); + + // fold (uint_to_fp c1) -> c1fp + if (isConstantIntBuildVectorOrConstantInt(N0) && + // ...but only if the target supports immediate floating-point values + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); + + // If the input is a legal type, and UINT_TO_FP is not legal on this target, + // but SINT_TO_FP is legal on this target, try to convert. + if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && + TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { + // If the sign bit is known to be zero, we can change this to SINT_TO_FP. + if (DAG.SignBitIsZero(N0)) + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); + } + + // The next optimizations are desirable only if SELECT_CC can be lowered. + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { + // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) + + if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && + (!LegalOperations || + TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { + SDLoc DL(N); + SDValue Ops[] = + { N0.getOperand(0), N0.getOperand(1), + DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT), + N0.getOperand(2) }; + return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); + } + } + + return SDValue(); +} + +// Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x +static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP) + return SDValue(); + + SDValue Src = N0.getOperand(0); + EVT SrcVT = Src.getValueType(); + bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP; + bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT; + + // We can safely assume the conversion won't overflow the output range, + // because (for example) (uint8_t)18293.f is undefined behavior. + + // Since we can assume the conversion won't overflow, our decision as to + // whether the input will fit in the float should depend on the minimum + // of the input range and output range. + + // This means this is also safe for a signed input and unsigned output, since + // a negative input would lead to undefined behavior. + unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned; + unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned; + unsigned ActualSize = std::min(InputSize, OutputSize); + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType()); + + // We can only fold away the float conversion if the input range can be + // represented exactly in the float range. + if (APFloat::semanticsPrecision(sem) >= ActualSize) { + if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) { + unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND + : ISD::ZERO_EXTEND; + return DAG.getNode(ExtOp, SDLoc(N), VT, Src); + } + if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src); + if (SrcVT == VT) + return Src; + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src); + } + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fp_to_sint c1fp) -> c1 + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); + + return FoldIntToFPToInt(N, DAG); +} + +SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fp_to_uint c1fp) -> c1 + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); + + return FoldIntToFPToInt(N, DAG); +} + +SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + EVT VT = N->getValueType(0); + + // fold (fp_round c1fp) -> c1fp + if (N0CFP) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); + + // fold (fp_round (fp_extend x)) -> x + if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) + return N0.getOperand(0); + + // fold (fp_round (fp_round x)) -> (fp_round x) + if (N0.getOpcode() == ISD::FP_ROUND) { + const bool NIsTrunc = N->getConstantOperandVal(1) == 1; + const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1; + // If the first fp_round isn't a value preserving truncation, it might + // introduce a tie in the second fp_round, that wouldn't occur in the + // single-step fp_round we want to fold to. + // In other words, double rounding isn't the same as rounding. + // Also, this is a value preserving truncation iff both fp_round's are. + if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) { + SDLoc DL(N); + return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0), + DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL)); + } + } + + // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) + if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, + N0.getOperand(0), N1); + AddToWorklist(Tmp.getNode()); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, + Tmp, N0.getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); + + // fold (fp_round_inreg c1fp) -> c1fp + if (N0CFP && isTypeLegal(EVT)) { + SDLoc DL(N); + SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT); + return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. + if (N->hasOneUse() && + N->use_begin()->getOpcode() == ISD::FP_ROUND) + return SDValue(); + + // fold (fp_extend c1fp) -> c1fp + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); + + // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op) + if (N0.getOpcode() == ISD::FP16_TO_FP && + TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0)); + + // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the + // value of X. + if (N0.getOpcode() == ISD::FP_ROUND + && N0.getNode()->getConstantOperandVal(1) == 1) { + SDValue In = N0.getOperand(0); + if (In.getValueType() == VT) return In; + if (VT.bitsLT(In.getValueType())) + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, + In, N0.getOperand(1)); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); + } + + // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) { + LoadSDNode *LN0 = cast<LoadSDNode>(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, + LN0->getChain(), + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); + CombineTo(N, ExtLoad); + CombineTo(N0.getNode(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), + N0.getValueType(), ExtLoad, + DAG.getIntPtrConstant(1, SDLoc(N0))), + ExtLoad.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFCEIL(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fceil c1) -> fceil(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFTRUNC(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ftrunc c1) -> ftrunc(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFFLOOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (ffloor c1) -> ffloor(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + + return SDValue(); +} + +// FIXME: FNEG and FABS have a lot in common; refactor. +SDValue DAGCombiner::visitFNEG(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Constant fold FNEG. + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); + + if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), + &DAG.getTarget().Options)) + return GetNegatedExpression(N0, DAG, LegalOperations); + + // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading + // constant pool values. + if (!TLI.isFNegFree(VT) && + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x80... per scalar element + // and splat it. + SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x80... + SignMask = APInt::getSignBit(IntVT.getSizeInBits()); + } + SDLoc DL0(N0); + Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int, + DAG.getConstant(SignMask, DL0, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); + } + } + + // (fneg (fmul c, x)) -> (fmul -c, x) + if (N0.getOpcode() == ISD::FMUL && + (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) { + ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); + if (CFP1) { + APFloat CVal = CFP1->getValueAPF(); + CVal.changeSign(); + if (Level >= AfterLegalizeDAG && + (TLI.isFPImmLegal(CVal, VT) || + TLI.isOperationLegal(ISD::ConstantFP, VT))) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FNEG, SDLoc(N), VT, + N0.getOperand(1)), + &cast<BinaryWithFlagsSDNode>(N0)->Flags); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFMINNUM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT); + } + + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFMAXNUM(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + EVT VT = N->getValueType(0); + const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0); + const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1); + + if (N0CFP && N1CFP) { + const APFloat &C0 = N0CFP->getValueAPF(); + const APFloat &C1 = N1CFP->getValueAPF(); + return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT); + } + + // Canonicalize to constant on RHS. + if (isConstantFPBuildVectorOrConstantFP(N0) && + !isConstantFPBuildVectorOrConstantFP(N1)) + return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFABS(SDNode *N) { + SDValue N0 = N->getOperand(0); + EVT VT = N->getValueType(0); + + // fold (fabs c1) -> fabs(c1) + if (isConstantFPBuildVectorOrConstantFP(N0)) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); + + // fold (fabs (fabs x)) -> (fabs x) + if (N0.getOpcode() == ISD::FABS) + return N->getOperand(0); + + // fold (fabs (fneg x)) -> (fabs x) + // fold (fabs (fcopysign x, y)) -> (fabs x) + if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); + + // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading + // constant pool values. + if (!TLI.isFAbsFree(VT) && + N0.getOpcode() == ISD::BITCAST && + N0.getNode()->hasOneUse()) { + SDValue Int = N0.getOperand(0); + EVT IntVT = Int.getValueType(); + if (IntVT.isInteger() && !IntVT.isVector()) { + APInt SignMask; + if (N0.getValueType().isVector()) { + // For a vector, get a mask such as 0x7f... per scalar element + // and splat it. + SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits()); + SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask); + } else { + // For a scalar, just generate 0x7f... + SignMask = ~APInt::getSignBit(IntVT.getSizeInBits()); + } + SDLoc DL(N0); + Int = DAG.getNode(ISD::AND, DL, IntVT, Int, + DAG.getConstant(SignMask, DL, IntVT)); + AddToWorklist(Int.getNode()); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitBRCOND(SDNode *N) { + SDValue Chain = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal + // on the target. + if (N1.getOpcode() == ISD::SETCC && + TLI.isOperationLegalOrCustom(ISD::BR_CC, + N1.getOperand(0).getValueType())) { + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, + Chain, N1.getOperand(2), + N1.getOperand(0), N1.getOperand(1), N2); + } + + if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || + ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && + (N1.getOperand(0).hasOneUse() && + N1.getOperand(0).getOpcode() == ISD::SRL))) { + SDNode *Trunc = nullptr; + if (N1.getOpcode() == ISD::TRUNCATE) { + // Look pass the truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + + // Match this pattern so that we can generate simpler code: + // + // %a = ... + // %b = and i32 %a, 2 + // %c = srl i32 %b, 1 + // brcond i32 %c ... + // + // into + // + // %a = ... + // %b = and i32 %a, 2 + // %c = setcc eq %b, 0 + // brcond %c ... + // + // This applies only when the AND constant value has one bit set and the + // SRL constant is equal to the log2 of the AND constant. The back-end is + // smart enough to convert the result into a TEST/JMP sequence. + SDValue Op0 = N1.getOperand(0); + SDValue Op1 = N1.getOperand(1); + + if (Op0.getOpcode() == ISD::AND && + Op1.getOpcode() == ISD::Constant) { + SDValue AndOp1 = Op0.getOperand(1); + + if (AndOp1.getOpcode() == ISD::Constant) { + const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); + + if (AndConst.isPowerOf2() && + cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { + SDLoc DL(N); + SDValue SetCC = + DAG.getSetCC(DL, + getSetCCResultType(Op0.getValueType()), + Op0, DAG.getConstant(0, DL, Op0.getValueType()), + ISD::SETNE); + + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL, + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) + deleteAndRecombine(Trunc); + // Replace the uses of SRL with SETCC + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + deleteAndRecombine(N1.getNode()); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + if (Trunc) + // Restore N1 if the above transformation doesn't match. + N1 = N->getOperand(1); + } + + // Transform br(xor(x, y)) -> br(x != y) + // Transform br(xor(xor(x,y), 1)) -> br (x == y) + if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { + SDNode *TheXor = N1.getNode(); + SDValue Op0 = TheXor->getOperand(0); + SDValue Op1 = TheXor->getOperand(1); + if (Op0.getOpcode() == Op1.getOpcode()) { + // Avoid missing important xor optimizations. + if (SDValue Tmp = visitXOR(TheXor)) { + if (Tmp.getNode() != TheXor) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); + deleteAndRecombine(TheXor); + return DAG.getNode(ISD::BRCOND, SDLoc(N), + MVT::Other, Chain, Tmp, N2); + } + + // visitXOR has changed XOR's operands or replaced the XOR completely, + // bail out. + return SDValue(N, 0); + } + } + + if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { + bool Equal = false; + if (isOneConstant(Op0) && Op0.hasOneUse() && + Op0.getOpcode() == ISD::XOR) { + TheXor = Op0.getNode(); + Equal = true; + } + + EVT SetCCVT = N1.getValueType(); + if (LegalTypes) + SetCCVT = getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), + SetCCVT, + Op0, Op1, + Equal ? ISD::SETEQ : ISD::SETNE); + // Replace the uses of XOR with SETCC + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, SetCC); + deleteAndRecombine(N1.getNode()); + return DAG.getNode(ISD::BRCOND, SDLoc(N), + MVT::Other, Chain, SetCC, N2); + } + } + + return SDValue(); +} + +// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. +// +SDValue DAGCombiner::visitBR_CC(SDNode *N) { + CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); + SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); + + // If N is a constant we could fold this into a fallthrough or unconditional + // branch. However that doesn't happen very often in normal code, because + // Instcombine/SimplifyCFG should have handled the available opportunities. + // If we did this folding here, it would be necessary to update the + // MachineBasicBlock CFG, which is awkward. + + // Use SimplifySetCC to simplify SETCC's. + SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), SDLoc(N), + false); + if (Simp.getNode()) AddToWorklist(Simp.getNode()); + + // fold to a simpler setcc + if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, + N->getOperand(0), Simp.getOperand(2), + Simp.getOperand(0), Simp.getOperand(1), + N->getOperand(4)); + + return SDValue(); +} + +/// Return true if 'Use' is a load or a store that uses N as its base pointer +/// and that N may be folded in the load / store addressing mode. +static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, + SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT VT; + unsigned AS; + + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { + if (LD->isIndexed() || LD->getBasePtr().getNode() != N) + return false; + VT = LD->getMemoryVT(); + AS = LD->getAddressSpace(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { + if (ST->isIndexed() || ST->getBasePtr().getNode() != N) + return false; + VT = ST->getMemoryVT(); + AS = ST->getAddressSpace(); + } else + return false; + + TargetLowering::AddrMode AM; + if (N->getOpcode() == ISD::ADD) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else if (N->getOpcode() == ISD::SUB) { + ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); + if (Offset) + // [reg +/- imm] + AM.BaseOffs = -Offset->getSExtValue(); + else + // [reg +/- reg] + AM.Scale = 1; + } else + return false; + + return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, + VT.getTypeForEVT(*DAG.getContext()), AS); +} + +/// Try turning a load/store into a pre-indexed load/store when the base +/// pointer is an add or subtract and it has other uses besides the load/store. +/// After the transformation, the new indexed load/store has effectively folded +/// the add/subtract in and all of its other uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail + // out. There is no reason to make this a preinc/predec. + if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || + Ptr.getNode()->hasOneUse()) + return false; + + // Ask the target to do addressing mode selection. + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) + return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa<ConstantSDNode>(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + + // Don't create a indexed load / store with zero offset. + if (isNullConstant(Offset)) + return false; + + // Try turning it into a pre-indexed load / store except when: + // 1) The new base ptr is a frame index. + // 2) If N is a store and the new base ptr is either the same as or is a + // predecessor of the value being stored. + // 3) Another use of old base ptr is a predecessor of N. If ptr is folded + // that would create a cycle. + // 4) All uses are load / store ops that use it as old base ptr. + + // Check #1. Preinc'ing a frame index would require copying the stack pointer + // (plus the implicit offset) to a register to preinc anyway. + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + return false; + + // Check #2. + if (!isLoad) { + SDValue Val = cast<StoreSDNode>(N)->getValue(); + if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) + return false; + } + + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector<SDNode *, 16> OtherUses; + if (isa<ConstantSDNode>(Offset)) + for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(), + UE = BasePtr.getNode()->use_end(); + UI != UE; ++UI) { + SDUse &Use = UI.getUse(); + // Skip the use that is Ptr and uses of other results from BasePtr's + // node (important for nodes that return multiple results). + if (Use.getUser() == Ptr.getNode() || Use != BasePtr) + continue; + + if (Use.getUser()->isPredecessorOf(N)) + continue; + + if (Use.getUser()->getOpcode() != ISD::ADD && + Use.getUser()->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1); + if (!isa<ConstantSDNode>(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use.getUser()); + } + + if (Swapped) + std::swap(BasePtr, Offset); + + // Now check for #3 and #4. + bool RealUse = false; + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + + for (SDNode *Use : Ptr.getNode()->uses()) { + if (Use == N) + continue; + if (N->hasPredecessorHelper(Use, Visited, Worklist)) + return false; + + // If Ptr may be folded in addressing mode of other use, then it's + // not profitable to do this transformation. + if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) + RealUse = true; + } + + if (!RealUse) + return false; + + SDValue Result; + if (isLoad) + Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM); + else + Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM); + ++PreIndexedNodes; + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.4 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); + } + + // Finally, since the node is now dead, remove it from the graph. + deleteAndRecombine(N); + + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + // We need to replace ptr0 in the following expression: + // x0 * offset0 + y0 * ptr0 = t0 + // knowing that + // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) + // + // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the + // indexed load/store and the expresion that needs to be re-written. + // + // Therefore, we have: + // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 + + ConstantSDNode *CN = + cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); + int X0, X1, Y0, Y1; + APInt Offset0 = CN->getAPIntValue(); + APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); + + X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; + Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; + X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; + Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; + + unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; + + APInt CNV = Offset0; + if (X0 < 0) CNV = -CNV; + if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; + else CNV = CNV - Offset1; + + SDLoc DL(OtherUses[i]); + + // We can now generate the new expression. + SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0)); + SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); + + SDValue NewUse = DAG.getNode(Opcode, + DL, + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + deleteAndRecombine(OtherUses[i]); + } + + // Replace the uses of Ptr with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); + deleteAndRecombine(Ptr.getNode()); + + return true; +} + +/// Try to combine a load/store with a add/sub of the base pointer node into a +/// post-indexed load/store. The transformation folded the add/subtract into the +/// new indexed load/store effectively and all of its uses are redirected to the +/// new load/store. +bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + bool isLoad = true; + SDValue Ptr; + EVT VT; + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { + if (LD->isIndexed()) + return false; + VT = LD->getMemoryVT(); + if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && + !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) + return false; + Ptr = LD->getBasePtr(); + } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { + if (ST->isIndexed()) + return false; + VT = ST->getMemoryVT(); + if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && + !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) + return false; + Ptr = ST->getBasePtr(); + isLoad = false; + } else { + return false; + } + + if (Ptr.getNode()->hasOneUse()) + return false; + + for (SDNode *Op : Ptr.getNode()->uses()) { + if (Op == N || + (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) + continue; + + SDValue BasePtr; + SDValue Offset; + ISD::MemIndexedMode AM = ISD::UNINDEXED; + if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { + // Don't create a indexed load / store with zero offset. + if (isNullConstant(Offset)) + continue; + + // Try turning it into a post-indexed load / store except when + // 1) All uses are load / store ops that use it as base ptr (and + // it may be folded as addressing mmode). + // 2) Op must be independent of N, i.e. Op is neither a predecessor + // nor a successor of N. Otherwise, if Op is folded that would + // create a cycle. + + if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) + continue; + + // Check for #1. + bool TryNext = false; + for (SDNode *Use : BasePtr.getNode()->uses()) { + if (Use == Ptr.getNode()) + continue; + + // If all the uses are load / store addresses, then don't do the + // transformation. + if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ + bool RealUse = false; + for (SDNode *UseUse : Use->uses()) { + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + RealUse = true; + } + + if (!RealUse) { + TryNext = true; + break; + } + } + } + + if (TryNext) + continue; + + // Check for #2 + if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { + SDValue Result = isLoad + ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM) + : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), + BasePtr, Offset, AM); + ++PostIndexedNodes; + ++NodesCombined; + DEBUG(dbgs() << "\nReplacing.5 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Result.getNode()->dump(&DAG); + dbgs() << '\n'); + WorklistRemover DeadNodes(*this); + if (isLoad) { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); + } else { + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); + } + + // Finally, since the node is now dead, remove it from the graph. + deleteAndRecombine(N); + + // Replace the uses of Use with uses of the updated base value. + DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), + Result.getValue(isLoad ? 1 : 0)); + deleteAndRecombine(Op); + return true; + } + } + } + + return false; +} + +/// \brief Return the base-pointer arithmetic from an indexed \p LD. +SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + assert(AM != ISD::UNINDEXED); + SDValue BP = LD->getOperand(1); + SDValue Inc = LD->getOperand(2); + + // Some backends use TargetConstants for load offsets, but don't expect + // TargetConstants in general ADD nodes. We can convert these constants into + // regular Constants (if the constant is not opaque). + assert((Inc.getOpcode() != ISD::TargetConstant || + !cast<ConstantSDNode>(Inc)->isOpaque()) && + "Cannot split out indexing using opaque target constants"); + if (Inc.getOpcode() == ISD::TargetConstant) { + ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc); + Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc), + ConstInc->getValueType(0)); + } + + unsigned Opc = + (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); + return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); +} + +SDValue DAGCombiner::visitLOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + + // If load is not volatile and there are no uses of the loaded value (and + // the updated indexed value in case of indexed loads), change uses of the + // chain value into uses of the chain input (i.e. delete the dead load). + if (!LD->isVolatile()) { + if (N->getValueType(1) == MVT::Other) { + // Unindexed loads. + if (!N->hasAnyUseOfValue(0)) { + // It's not safe to use the two value CombineTo variant here. e.g. + // v1, chain2 = load chain1, loc + // v2, chain3 = load chain2, loc + // v3 = add v2, c + // Now we replace use of chain2 with chain1. This makes the second load + // isomorphic to the one we are deleting, and thus makes this load live. + DEBUG(dbgs() << "\nReplacing.6 "; + N->dump(&DAG); + dbgs() << "\nWith chain: "; + Chain.getNode()->dump(&DAG); + dbgs() << "\n"); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + + if (N->use_empty()) + deleteAndRecombine(N); + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else { + // Indexed loads. + assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); + + // If this load has an opaque TargetConstant offset, then we cannot split + // the indexing into an add/sub directly (that TargetConstant may not be + // valid for a different type of node, and we cannot convert an opaque + // target constant into a regular constant). + bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant && + cast<ConstantSDNode>(LD->getOperand(2))->isOpaque(); + + if (!N->hasAnyUseOfValue(0) && + ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) { + SDValue Undef = DAG.getUNDEF(N->getValueType(0)); + SDValue Index; + if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) { + Index = SplitIndexingFromLoad(LD); + // Try to fold the base pointer arithmetic into subsequent loads and + // stores. + AddUsersToWorklist(N); + } else + Index = DAG.getUNDEF(N->getValueType(1)); + DEBUG(dbgs() << "\nReplacing.7 "; + N->dump(&DAG); + dbgs() << "\nWith: "; + Undef.getNode()->dump(&DAG); + dbgs() << " and 2 other values\n"); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); + deleteAndRecombine(N); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } + } + + // If this load is directly stored, replace the load value with the stored + // value. + // TODO: Handle store large -> read small portion. + // TODO: Handle TRUNCSTORE/LOADEXT + if (ISD::isNormalLoad(N) && !LD->isVolatile()) { + if (ISD::isNON_TRUNCStore(Chain.getNode())) { + StoreSDNode *PrevST = cast<StoreSDNode>(Chain); + if (PrevST->getBasePtr() == Ptr && + PrevST->getValue().getValueType() == N->getValueType(0)) + return CombineTo(N, Chain.getOperand(1), Chain); + } + } + + // Try to infer better alignment information than the load already has. + if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > LD->getMemOperand()->getBaseAlignment()) { + SDValue NewLoad = + DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), + LD->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + LD->getMemoryVT(), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), Align, LD->getAAInfo()); + if (NewLoad.getNode() != N) + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + } + } + } + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && LD->isUnindexed()) { + // Walk up chain skipping non-aliasing memory nodes. + SDValue BetterChain = FindBetterChain(N, Chain); + + // If there is a better chain. + if (Chain != BetterChain) { + SDValue ReplLoad; + + // Replace the chain to void dependency. + if (LD->getExtensionType() == ISD::NON_EXTLOAD) { + ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), + BetterChain, Ptr, LD->getMemOperand()); + } else { + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), + LD->getValueType(0), + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); + } + + // Create token factor to keep old chain connected. + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), + MVT::Other, Chain, ReplLoad.getValue(1)); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Replace uses with load result and token factor. Don't add users + // to work list. + return CombineTo(N, ReplLoad.getValue(0), Token, false); + } + } + + // Try transforming N to an indexed load. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + + return SDValue(); +} + +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(), + LS.Inst->getValueType(0))) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, + unsigned Shift = 0, SelectionDAG *DAG = nullptr) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != nullptr. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = DAG->getDataLayout().isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + SDLoc DL(Origin); + BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr, + DAG->getConstant(Offset, DL, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), + [](const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + }); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = nullptr; + const LoadedSlice *Second = nullptr; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = nullptr; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = nullptr; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector<LoadedSlice, 4> LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa<ConstantSDNode>(User->getOperand(1))) { + Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non-8-bits multiple, the slice + // will be across several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector<SDValue, 8> ArgChains; + for (SmallVectorImpl<LoadedSlice>::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + ArgChains); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + +/// Check to see if V is (and load (ptr), imm), where the load is having +/// specific bytes cleared out. If so, return the byte size being masked out +/// and the shift amount. +static std::pair<unsigned, unsigned> +CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { + std::pair<unsigned, unsigned> Result(0, 0); + + // Check for the structure we're looking for. + if (V->getOpcode() != ISD::AND || + !isa<ConstantSDNode>(V->getOperand(1)) || + !ISD::isNormalLoad(V->getOperand(0).getNode())) + return Result; + + // Check the chain and pointer. + LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); + if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. + + // The store should be chained directly to the load or be an operand of a + // tokenfactor. + if (LD == Chain.getNode()) + ; // ok. + else if (Chain->getOpcode() != ISD::TokenFactor) + return Result; // Fail. + else { + bool isOk = false; + for (const SDValue &ChainOp : Chain->op_values()) + if (ChainOp.getNode() == LD) { + isOk = true; + break; + } + if (!isOk) return Result; + } + + // This only handles simple types. + if (V.getValueType() != MVT::i16 && + V.getValueType() != MVT::i32 && + V.getValueType() != MVT::i64) + return Result; + + // Check the constant mask. Invert it so that the bits being masked out are + // 0 and the bits being kept are 1. Use getSExtValue so that leading bits + // follow the sign bit for uniformity. + uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); + unsigned NotMaskLZ = countLeadingZeros(NotMask); + if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. + unsigned NotMaskTZ = countTrailingZeros(NotMask); + if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. + if (NotMaskLZ == 64) return Result; // All zero mask. + + // See if we have a continuous run of bits. If so, we have 0*1+0* + if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64) + return Result; + + // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. + if (V.getValueType() != MVT::i64 && NotMaskLZ) + NotMaskLZ -= 64-V.getValueSizeInBits(); + + unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; + switch (MaskedBytes) { + case 1: + case 2: + case 4: break; + default: return Result; // All one mask, or 5-byte mask. + } + + // Verify that the first bit starts at a multiple of mask so that the access + // is aligned the same as the access width. + if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; + + Result.first = MaskedBytes; + Result.second = NotMaskTZ/8; + return Result; +} + + +/// Check to see if IVal is something that provides a value as specified by +/// MaskInfo. If so, replace the specified store with a narrower store of +/// truncated IVal. +static SDNode * +ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, + SDValue IVal, StoreSDNode *St, + DAGCombiner *DC) { + unsigned NumBytes = MaskInfo.first; + unsigned ByteShift = MaskInfo.second; + SelectionDAG &DAG = DC->getDAG(); + + // Check to see if IVal is all zeros in the part being masked in by the 'or' + // that uses this. If not, this is not a replacement. + APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), + ByteShift*8, (ByteShift+NumBytes)*8); + if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; + + // Check that it is legal on the target to do this. It is legal if the new + // VT we're shrinking to (i8/i16/i32) is legal or we're still before type + // legalization. + MVT VT = MVT::getIntegerVT(NumBytes*8); + if (!DC->isTypeLegal(VT)) + return nullptr; + + // Okay, we can do this! Replace the 'St' store with a store of IVal that is + // shifted by ByteShift and truncated down to NumBytes. + if (ByteShift) { + SDLoc DL(IVal); + IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal, + DAG.getConstant(ByteShift*8, DL, + DC->getShiftAmountTy(IVal.getValueType()))); + } + + // Figure out the offset for the store and the alignment of the access. + unsigned StOffset; + unsigned NewAlign = St->getAlignment(); + + if (DAG.getDataLayout().isLittleEndian()) + StOffset = ByteShift; + else + StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; + + SDValue Ptr = St->getBasePtr(); + if (StOffset) { + SDLoc DL(IVal); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), + Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType())); + NewAlign = MinAlign(NewAlign, StOffset); + } + + // Truncate down to the new size. + IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); + + ++OpsNarrowed; + return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, + St->getPointerInfo().getWithOffset(StOffset), + false, false, NewAlign).getNode(); +} + + +/// Look for sequence of load / op / store where op is one of 'or', 'xor', and +/// 'and' of immediates. If 'op' is only touching some of the loaded bits, try +/// narrowing the load and store if it would end up being a win for performance +/// or code size. +SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + if (ST->isVolatile()) + return SDValue(); + + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + EVT VT = Value.getValueType(); + + if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) + return SDValue(); + + unsigned Opc = Value.getOpcode(); + + // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst + // is a byte mask indicating a consecutive number of bytes, check to see if + // Y is known to provide just those bytes. If so, we try to replace the + // load + replace + store sequence with a single (narrower) store, which makes + // the load dead. + if (Opc == ISD::OR) { + std::pair<unsigned, unsigned> MaskedLoad; + MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(1), ST,this)) + return SDValue(NewST, 0); + + // Or is commutative, so try swapping X and Y. + MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); + if (MaskedLoad.first) + if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, + Value.getOperand(0), ST,this)) + return SDValue(NewST, 0); + } + + if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || + Value.getOperand(1).getOpcode() != ISD::Constant) + return SDValue(); + + SDValue N0 = Value.getOperand(0); + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && + Chain == SDValue(N0.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(N0); + if (LD->getBasePtr() != Ptr || + LD->getPointerInfo().getAddrSpace() != + ST->getPointerInfo().getAddrSpace()) + return SDValue(); + + // Find the type to narrow it the load / op / store to. + SDValue N1 = Value.getOperand(1); + unsigned BitWidth = N1.getValueSizeInBits(); + APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); + if (Opc == ISD::AND) + Imm ^= APInt::getAllOnesValue(BitWidth); + if (Imm == 0 || Imm.isAllOnesValue()) + return SDValue(); + unsigned ShAmt = Imm.countTrailingZeros(); + unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; + unsigned NewBW = NextPowerOf2(MSB - ShAmt); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + // The narrowing should be profitable, the load/store operation should be + // legal (or custom) and the store size should be equal to the NewVT width. + while (NewBW < BitWidth && + (NewVT.getStoreSizeInBits() != NewBW || + !TLI.isOperationLegalOrCustom(Opc, NewVT) || + !TLI.isNarrowingProfitable(VT, NewVT))) { + NewBW = NextPowerOf2(NewBW); + NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); + } + if (NewBW >= BitWidth) + return SDValue(); + + // If the lsb changed does not start at the type bitwidth boundary, + // start at the previous one. + if (ShAmt % NewBW) + ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, + std::min(BitWidth, ShAmt + NewBW)); + if ((Imm & Mask) == Imm) { + APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); + if (Opc == ISD::AND) + NewImm ^= APInt::getAllOnesValue(NewBW); + uint64_t PtrOff = ShAmt / 8; + // For big endian targets, we need to adjust the offset to the pointer to + // load the correct bytes. + if (DAG.getDataLayout().isBigEndian()) + PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; + + unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); + Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); + if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy)) + return SDValue(); + + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), + Ptr.getValueType(), Ptr, + DAG.getConstant(PtrOff, SDLoc(LD), + Ptr.getValueType())); + SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), + LD->getChain(), NewPtr, + LD->getPointerInfo().getWithOffset(PtrOff), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), NewAlign, + LD->getAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, + DAG.getConstant(NewImm, SDLoc(Value), + NewVT)); + SDValue NewST = DAG.getStore(Chain, SDLoc(N), + NewVal, NewPtr, + ST->getPointerInfo().getWithOffset(PtrOff), + false, false, NewAlign); + + AddToWorklist(NewPtr.getNode()); + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewVal.getNode()); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); + ++OpsNarrowed; + return NewST; + } + } + + return SDValue(); +} + +/// For a given floating point load / store pair, if the load value isn't used +/// by any other operations, then consider transforming the pair to integer +/// load / store operations if the target deems the transformation profitable. +SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && + Value.hasOneUse() && + Chain == SDValue(Value.getNode(), 1)) { + LoadSDNode *LD = cast<LoadSDNode>(Value); + EVT VT = LD->getMemoryVT(); + if (!VT.isFloatingPoint() || + VT != ST->getMemoryVT() || + LD->isNonTemporal() || + ST->isNonTemporal() || + LD->getPointerInfo().getAddrSpace() != 0 || + ST->getPointerInfo().getAddrSpace() != 0) + return SDValue(); + + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || + !TLI.isOperationLegal(ISD::STORE, IntVT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || + !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) + return SDValue(); + + unsigned LDAlign = LD->getAlignment(); + unsigned STAlign = ST->getAlignment(); + Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); + unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy); + if (LDAlign < ABIAlign || STAlign < ABIAlign) + return SDValue(); + + SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), + LD->getChain(), LD->getBasePtr(), + LD->getPointerInfo(), + false, false, false, LDAlign); + + SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), + NewLD, ST->getBasePtr(), + ST->getPointerInfo(), + false, false, STAlign); + + AddToWorklist(NewLD.getNode()); + AddToWorklist(NewST.getNode()); + WorklistRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); + ++LdStFP2Int; + return NewST; + } + + return SDValue(); +} + +namespace { +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; + } + + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr) { + bool IsIndexSignExt = false; + + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. + if (isa<ConstantSDNode>(Ptr->getOperand(1))) { + int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); + return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, + IsIndexSignExt); + } + + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa<ConstantSDNode>(Offset)) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + } +}; +} // namespace + +// This is a helper function for visitMUL to check the profitability +// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). +// MulNode is the original multiply, AddNode is (add x, c1), +// and ConstNode is c2. +// +// If the (add x, c1) has multiple uses, we could increase +// the number of adds if we make this transformation. +// It would only be worth doing this if we can remove a +// multiply in the process. Check for that here. +// To illustrate: +// (A + c1) * c3 +// (A + c2) * c3 +// We're checking for cases where we have common "c3 * A" expressions. +bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode, + SDValue &AddNode, + SDValue &ConstNode) { + APInt Val; + + // If the add only has one use, this would be OK to do. + if (AddNode.getNode()->hasOneUse()) + return true; + + // Walk all the users of the constant with which we're multiplying. + for (SDNode *Use : ConstNode->uses()) { + + if (Use == MulNode) // This use is the one we're on right now. Skip it. + continue; + + if (Use->getOpcode() == ISD::MUL) { // We have another multiply use. + SDNode *OtherOp; + SDNode *MulVar = AddNode.getOperand(0).getNode(); + + // OtherOp is what we're multiplying against the constant. + if (Use->getOperand(0) == ConstNode) + OtherOp = Use->getOperand(1).getNode(); + else + OtherOp = Use->getOperand(0).getNode(); + + // Check to see if multiply is with the same operand of our "add". + // + // ConstNode = CONST + // Use = ConstNode * A <-- visiting Use. OtherOp is A. + // ... + // AddNode = (A + c1) <-- MulVar is A. + // = AddNode * ConstNode <-- current visiting instruction. + // + // If we make this transformation, we will have a common + // multiply (ConstNode * A) that we can save. + if (OtherOp == MulVar) + return true; + + // Now check to see if a future expansion will give us a common + // multiply. + // + // ConstNode = CONST + // AddNode = (A + c1) + // ... = AddNode * ConstNode <-- current visiting instruction. + // ... + // OtherOp = (A + c2) + // Use = OtherOp * ConstNode <-- visiting Use. + // + // If we make this transformation, we will have a common + // multiply (CONST * A) after we also do the same transformation + // to the "t2" instruction. + if (OtherOp->getOpcode() == ISD::ADD && + isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) && + OtherOp->getOperand(0).getNode() == MulVar) + return true; + } + } + + // Didn't find a case where this would be profitable. + return false; +} + +SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG, + SDLoc SL, + ArrayRef<MemOpLink> Stores, + SmallVectorImpl<SDValue> &Chains, + EVT Ty) const { + SmallVector<SDValue, 8> BuildVector; + + for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) { + StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode); + Chains.push_back(St->getChain()); + BuildVector.push_back(St->getValue()); + } + + return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector); +} + +bool DAGCombiner::MergeStoresOfConstantsOrVecElts( + SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, + unsigned NumStores, bool IsConstantSrc, bool UseVector) { + // Make sure we have something to merge. + if (NumStores < 2) + return false; + + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned LatestNodeUsed = 0; + + for (unsigned i=0; i < NumStores; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; + } + + SmallVector<SDValue, 8> Chains; + + // The latest Node in the DAG. + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + SDLoc DL(StoreNodes[0].MemNode); + + SDValue StoredVal; + if (UseVector) { + bool IsVec = MemVT.isVector(); + unsigned Elts = NumStores; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + // Get the type for the merged vector store. + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); + + if (IsConstantSrc) { + StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty); + } else { + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < NumStores; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue Val = St->getValue(); + // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type. + if (Val.getValueType() != MemVT) + return false; + Ops.push_back(Val); + Chains.push_back(St->getChain()); + } + + // Build the extracted vector elements back into a vector. + StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, + DL, Ty, Ops); } + } else { + // We should always use a vector store when merging extracted vector + // elements, so this path implies a store of constants. + assert(IsConstantSrc && "Merged vector elements should use vector store"); + + unsigned SizeInBits = NumStores * ElementSizeBytes * 8; + APInt StoreInt(SizeInBits, 0); + + // Construct a single integer constant which is made of the smaller + // constant inputs. + bool IsLE = DAG.getDataLayout().isLittleEndian(); + for (unsigned i = 0; i < NumStores; ++i) { + unsigned Idx = IsLE ? (NumStores - 1 - i) : i; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); + Chains.push_back(St->getChain()); + + SDValue Val = St->getValue(); + StoreInt <<= ElementSizeBytes * 8; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { + StoreInt |= C->getAPIntValue().zext(SizeInBits); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { + StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits); + } else { + llvm_unreachable("Invalid constant element type"); + } + } + + // Create the new Load and Store operations. + EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits); + StoredVal = DAG.getConstant(StoreInt, DL, StoreTy); + } + + assert(!Chains.empty()); + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal, + FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), + false, false, + FirstInChain->getAlignment()); + + // Replace the last store with the new store + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumStores; ++i) { + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + // ReplaceAllUsesWith will replace all uses that existed when it was + // called, but graph optimizations may cause new ones to appear. For + // example, the case in pr14333 looks like + // + // St's chain -> St -> another store -> X + // + // And the only difference from St to the other store is the chain. + // When we change it's chain to be St's chain they become identical, + // get CSEed and the net result is that X is now a use of St. + // Since we know that St is redundant, just iterate. + while (!St->use_empty()) + DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; +} + +void DAGCombiner::getStoreMergeAndAliasCandidates( + StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, + SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return; + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + EVT MemVT = St->getMemoryVT(); + unsigned Seq = 0; + StoreSDNode *Index = St; + + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); + + if (UseAA) { + // Look at other users of the same chain. Stores on the same chain do not + // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized + // to be on the same chain, so don't bother looking at adjacent chains. + + SDValue Chain = St->getChain(); + for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) { + if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) { + if (I.getOperandNo() != 0) + continue; + + if (OtherST->isVolatile() || OtherST->isIndexed()) + continue; + + if (OtherST->getMemoryVT() != MemVT) + continue; + + BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr()); + + if (Ptr.equalBaseIndex(BasePtr)) + StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++)); + } + } + + return; + } + + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 0)->hasOneUse()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + // The memory operands must not be volatile. + if (Index->isVolatile() || Index->isIndexed()) + break; + + // No truncation. + if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) + if (St->isTruncatingStore()) + break; + + // The stored memory type must be the same. + if (Index->getMemoryVT() != MemVT) + break; + + // We do not allow under-aligned stores in order to prevent + // overriding stores. NOTE: this is a bad hack. Alignment SHOULD + // be irrelevant here; what MATTERS is that we not move memory + // operations that potentially overlap past each-other. + if (Index->getAlignment() < MemVT.getStoreSize()) + break; + + // We found a potential memory operand to merge. + StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (1) { + if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { + // We found a store node. Use it for the next iteration. + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + if (Ldn->isVolatile()) { + Index = nullptr; + break; + } + + // Save the load node for later. Continue the scan. + AliasLoadNodes.push_back(Ldn); + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = nullptr; + break; + } + } + } +} + +bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { + if (OptLevel == CodeGenOpt::None) + return false; + + EVT MemVT = St->getMemoryVT(); + int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8; + bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute( + Attribute::NoImplicitFloat); + + // This function cannot currently deal with non-byte-sized memory sizes. + if (ElementSizeBytes * 8 != MemVT.getSizeInBits()) + return false; + + if (!MemVT.isSimple()) + return false; + + // Perform an early exit check. Do not bother looking at stored values that + // are not constants, loads, or extracted vector elements. + SDValue StoredVal = St->getValue(); + bool IsLoadSrc = isa<LoadSDNode>(StoredVal); + bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) || + isa<ConstantFPSDNode>(StoredVal); + bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT || + StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR); + + if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc) + return false; + + // Don't merge vectors into wider vectors if the source data comes from loads. + // TODO: This restriction can be lifted by using logic similar to the + // ExtractVecSrc case. + if (MemVT.isVector() && IsLoadSrc) + return false; + + // Only look at ends of store sequences. + SDValue Chain = SDValue(St, 0); + if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) + return false; + + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. + SmallVector<MemOpLink, 8> StoreNodes; + + getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes); + + // Check if there is anything to merge. + if (StoreNodes.size() < 2) + return false; + + // Sort the memory operands according to their distance from the + // base pointer. As a secondary criteria: make sure stores coming + // later in the code come first in the list. This is important for + // the non-UseAA case, because we're merging stores into the FINAL + // store along a chain which potentially contains aliasing stores. + // Thus, if there are multiple stores to the same address, the last + // one can be considered for merging but not the others. + std::sort(StoreNodes.begin(), StoreNodes.end(), + [](MemOpLink LHS, MemOpLink RHS) { + return LHS.OffsetFromBase < RHS.OffsetFromBase || + (LHS.OffsetFromBase == RHS.OffsetFromBase && + LHS.SequenceNum < RHS.SequenceNum); + }); + + // Scan the memory operations on the chain and find the first non-consecutive + // store memory address. + unsigned LastConsecutiveStore = 0; + int64_t StartAddress = StoreNodes[0].OffsetFromBase; + for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { + + // Check that the addresses are consecutive starting from the second + // element in the list of stores. + if (i > 0) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + } + + // Check if this store interferes with any of the loads that we found. + // If we find a load that alias with this store. Stop the sequence. + if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(), + [&](LSBaseSDNode* Ldn) { + return isAlias(Ldn, StoreNodes[i].MemNode); + })) + break; + + // Mark this node as useful. + LastConsecutiveStore = i; + } + + // The node with the lowest store address. + LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; + unsigned FirstStoreAS = FirstInChain->getAddressSpace(); + unsigned FirstStoreAlign = FirstInChain->getAlignment(); + LLVMContext &Context = *DAG.getContext(); + const DataLayout &DL = DAG.getDataLayout(); + + // Store the constants into memory as one consecutive store. + if (IsConstantSrc) { + unsigned LastLegalType = 0; + unsigned LastLegalVectorType = 0; + bool NonZero = false; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + SDValue StoredVal = St->getValue(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { + NonZero |= !C->isNullValue(); + } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { + NonZero |= !C->getConstantFPValue()->isNullValue(); + } else { + // Non-constant. + break; + } + + // Find a legal type for the constant store. + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits); + bool IsFast; + if (TLI.isTypeLegal(StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) { + LastLegalType = i+1; + // Or check whether a truncstore is legal. + } else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(Context, StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFast) && + IsFast) { + LastLegalType = i + 1; + } + } + + // We only use vectors if the constant is known to be zero or the target + // allows it and the function is not marked with the noimplicitfloat + // attribute. + if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1, + FirstStoreAS)) && + !NoVectors) { + // Find a legal type for the vector store. + EVT Ty = EVT::getVectorVT(Context, MemVT, i+1); + if (TLI.isTypeLegal(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + LastLegalVectorType = i + 1; + } + } + + // Check if we found a legal integer type to store. + if (LastLegalType == 0 && LastLegalVectorType == 0) + return false; + + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; + unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; + + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, + true, UseVector); + } + + // When extracting multiple vector elements, try to store them + // in one vector store rather than a sequence of scalar stores. + if (IsExtractVecSrc) { + unsigned NumStoresToMerge = 0; + bool IsVec = MemVT.isVector(); + for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + unsigned StoreValOpcode = St->getValue().getOpcode(); + // This restriction could be loosened. + // Bail out if any stored values are not elements extracted from a vector. + // It should be possible to handle mixed sources, but load sources need + // more careful handling (see the block of code below that handles + // consecutive loads). + if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT && + StoreValOpcode != ISD::EXTRACT_SUBVECTOR) + return false; + + // Find a legal type for the vector store. + unsigned Elts = i + 1; + if (IsVec) { + // When merging vector stores, get the total number of elements. + Elts *= MemVT.getVectorNumElements(); + } + EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts); + bool IsFast; + if (TLI.isTypeLegal(Ty) && + TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS, + FirstStoreAlign, &IsFast) && IsFast) + NumStoresToMerge = i + 1; + } + + return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge, + false, true); + } + + // Below we handle the case of multiple consecutive stores that + // come from multiple consecutive loads. We merge them into a single + // wide load and a single wide store. + + // Look for load nodes which are used by the stored values. + SmallVector<MemOpLink, 8> LoadNodes; + + // Find acceptable loads. Loads need to have the same chain (token factor), + // must not be zext, volatile, indexed, and they must be consecutive. + BaseIndexOffset LdBasePtr; + for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); + if (!Ld) break; + + // Loads must only have one use. + if (!Ld->hasNUsesOfValue(1, 0)) + break; + + // The memory operands must not be volatile. + if (Ld->isVolatile() || Ld->isIndexed()) + break; + + // We do not accept ext loads. + if (Ld->getExtensionType() != ISD::NON_EXTLOAD) + break; + + // The stored memory type must be the same. + if (Ld->getMemoryVT() != MemVT) + break; + + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); + // If this is not the first ptr that we check. + if (LdBasePtr.Base.getNode()) { + // The base ptr must be the same. + if (!LdPtr.equalBaseIndex(LdBasePtr)) + break; + } else { + // Check that all other base pointers are the same as this one. + LdBasePtr = LdPtr; + } + + // We found a potential memory operand to merge. + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); + } + + if (LoadNodes.size() < 2) + return false; + + // If we have load/store pair instructions and we only have two values, + // don't bother. + unsigned RequiredAlignment; + if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) && + St->getAlignment() >= RequiredAlignment) + return false; + + LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); + unsigned FirstLoadAS = FirstLoad->getAddressSpace(); + unsigned FirstLoadAlign = FirstLoad->getAlignment(); + + // Scan the memory operations on the chain and find the first non-consecutive + // load memory address. These variables hold the index in the store node + // array. + unsigned LastConsecutiveLoad = 0; + // This variable refers to the size and not index in the array. + unsigned LastLegalVectorType = 0; + unsigned LastLegalIntegerType = 0; + StartAddress = LoadNodes[0].OffsetFromBase; + SDValue FirstChain = FirstLoad->getChain(); + for (unsigned i = 1; i < LoadNodes.size(); ++i) { + // All loads must share the same chain. + if (LoadNodes[i].MemNode->getChain() != FirstChain) + break; + + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + LastConsecutiveLoad = i; + // Find a legal type for the vector store. + EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1); + bool IsFastSt, IsFastLd; + if (TLI.isTypeLegal(StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) { + LastLegalVectorType = i + 1; + } + + // Find a legal type for the integer store. + unsigned SizeInBits = (i+1) * ElementSizeBytes * 8; + StoreTy = EVT::getIntegerVT(Context, SizeInBits); + if (TLI.isTypeLegal(StoreTy) && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS, + FirstStoreAlign, &IsFastSt) && IsFastSt && + TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS, + FirstLoadAlign, &IsFastLd) && IsFastLd) + LastLegalIntegerType = i + 1; + // Or check whether a truncstore and extload is legal. + else if (TLI.getTypeAction(Context, StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(Context, StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstStoreAS, FirstStoreAlign, &IsFastSt) && + IsFastSt && + TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy, + FirstLoadAS, FirstLoadAlign, &IsFastLd) && + IsFastLd) + LastLegalIntegerType = i+1; + } + } + + // Only use vector types if the vector type is larger than the integer type. + // If they are the same, use integers. + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; + unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); + + // We add +1 here because the LastXXX variables refer to location while + // the NumElem refers to array/index size. + unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; + NumElem = std::min(LastLegalType, NumElem); + + if (NumElem < 2) + return false; + + // Collect the chains from all merged stores. + SmallVector<SDValue, 8> MergeStoreChains; + MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain()); + + // The latest Node in the DAG. + unsigned LatestNodeUsed = 0; + for (unsigned i=1; i<NumElem; ++i) { + // Find a chain for the new wide-store operand. Notice that some + // of the store nodes that we found may not be selected for inclusion + // in the wide store. The chain we use needs to be the chain of the + // latest store node which is *used* and replaced by the wide store. + if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum) + LatestNodeUsed = i; + + MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain()); + } + + LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode; + + // Find if it is better to use vectors or integers to load and store + // to memory. + EVT JointMemOpVT; + if (UseVectorTy) { + JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem); + } else { + unsigned SizeInBits = NumElem * ElementSizeBytes * 8; + JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits); + } + + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); + + // The merged loads are required to have the same incoming chain, so + // using the first's chain is acceptable. + SDValue NewLoad = DAG.getLoad( + JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(), + FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign); + + SDValue NewStoreChain = + DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains); + + SDValue NewStore = DAG.getStore( + NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(), + FirstInChain->getPointerInfo(), false, false, FirstStoreAlign); + + // Transfer chain users from old loads to the new load. + for (unsigned i = 0; i < NumElem; ++i) { + LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), + SDValue(NewLoad.getNode(), 1)); + } + + // Replace the last store with the new store. + CombineTo(LatestOp, NewStore); + // Erase all other stores. + for (unsigned i = 0; i < NumElem ; ++i) { + // Remove all Store nodes. + if (StoreNodes[i].MemNode == LatestOp) + continue; + StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); + DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); + deleteAndRecombine(St); + } + + return true; +} + +SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) { + SDLoc SL(ST); + SDValue ReplStore; + + // Replace the chain to avoid dependency. + if (ST->isTruncatingStore()) { + ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(), + ST->getBasePtr(), ST->getMemoryVT(), + ST->getMemOperand()); + } else { + ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(), + ST->getMemOperand()); + } + + // Create token to keep both nodes around. + SDValue Token = DAG.getNode(ISD::TokenFactor, SL, + MVT::Other, ST->getChain(), ReplStore); + + // Make sure the new and old chains are cleaned up. + AddToWorklist(Token.getNode()); + + // Don't add users to work list. + return CombineTo(ST, Token, false); +} + +SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) { + SDValue Value = ST->getValue(); + if (Value.getOpcode() == ISD::TargetConstantFP) + return SDValue(); + + SDLoc DL(ST); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value); + + // NOTE: If the original store is volatile, this transform must not increase + // the number of stores. For example, on x86-32 an f64 can be stored in one + // processor operation but an i64 (which is not legal) requires two. So the + // transform should not be done in this case. + + SDValue Tmp; + switch (CFP->getSimpleValueType(0).SimpleTy) { + default: + llvm_unreachable("Unknown FP type"); + case MVT::f16: // We don't do this for these yet. + case MVT::f80: + case MVT::f128: + case MVT::ppcf128: + return SDValue(); + case MVT::f32: + if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + ; + Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). + bitcastToAPInt().getZExtValue(), SDLoc(CFP), + MVT::i32); + return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand()); + } + + return SDValue(); + case MVT::f64: + if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && + !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { + ; + Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + getZExtValue(), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, DL, Tmp, + Ptr, ST->getMemOperand()); + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + // Many FP stores are not made apparent until after legalize, e.g. for + // argument passing. Since this is so common, custom legalize the + // 64-bit integer store into two 32-bit stores. + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32); + SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + + SDValue St0 = DAG.getStore(Chain, DL, Lo, + Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, + ST->getAlignment(), AAInfo); + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(4, DL, Ptr.getValueType())); + Alignment = MinAlign(Alignment, 4U); + SDValue St1 = DAG.getStore(Chain, DL, Hi, + Ptr, ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, + Alignment, AAInfo); + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + St0, St1); + } + + return SDValue(); + } +} + +SDValue DAGCombiner::visitSTORE(SDNode *N) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Chain = ST->getChain(); + SDValue Value = ST->getValue(); + SDValue Ptr = ST->getBasePtr(); + + // If this is a store of a bit convert, store the input value if the + // resultant store does not need a higher alignment than the original. + if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && + ST->isUnindexed()) { + unsigned OrigAlign = ST->getAlignment(); + EVT SVT = Value.getOperand(0).getValueType(); + unsigned Align = DAG.getDataLayout().getABITypeAlignment( + SVT.getTypeForEVT(*DAG.getContext())); + if (Align <= OrigAlign && + ((!LegalOperations && !ST->isVolatile()) || + TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getPointerInfo(), ST->isVolatile(), + ST->isNonTemporal(), OrigAlign, + ST->getAAInfo()); + } + + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + + // Try to infer better alignment information than the store already has. + if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { + if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { + if (Align > ST->getAlignment()) { + SDValue NewStore = + DAG.getTruncStore(Chain, SDLoc(N), Value, + Ptr, ST->getPointerInfo(), ST->getMemoryVT(), + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getAAInfo()); + if (NewStore.getNode() != N) + return CombineTo(ST, NewStore, true); + } + } + } + + // Try transforming a pair floating point load / store ops to integer + // load / store ops. + if (SDValue NewST = TransformFPLoadStorePair(N)) + return NewST; + + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA + : DAG.getSubtarget().useAA(); +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && ST->isUnindexed()) { + // FIXME: We should do this even without AA enabled. AA will just allow + // FindBetterChain to work in more situations. The problem with this is that + // any combine that expects memory operations to be on consecutive chains + // first needs to be updated to look for users of the same chain. + + // Walk up chain skipping non-aliasing memory nodes, on this store and any + // adjacent stores. + if (findBetterNeighborChains(ST)) { + // replaceStoreChain uses CombineTo, which handled all of the worklist + // manipulation. Return the original node to not do anything else. + return SDValue(ST, 0); + } + } + + // Try transforming N to an indexed store. + if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) + return SDValue(N, 0); + + // FIXME: is there such a thing as a truncating indexed store? + if (ST->isTruncatingStore() && ST->isUnindexed() && + Value.getValueType().isInteger()) { + // See if we can simplify the input to this truncstore with knowledge that + // only the low bits are being used. For example: + // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" + SDValue Shorter = + GetDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits())); + AddToWorklist(Value.getNode()); + if (Shorter.getNode()) + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, + Ptr, ST->getMemoryVT(), ST->getMemOperand()); + + // Otherwise, see if we can simplify the operation with + // SimplifyDemandedBits, which only works if the value has a single use. + if (SimplifyDemandedBits(Value, + APInt::getLowBitsSet( + Value.getValueType().getScalarType().getSizeInBits(), + ST->getMemoryVT().getScalarType().getSizeInBits()))) + return SDValue(N, 0); + } + + // If this is a load followed by a store to the same location, then the store + // is dead/noop. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { + if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && + ST->isUnindexed() && !ST->isVolatile() && + // There can't be any side effects between the load and store, such as + // a call or store. + Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is a store followed by a store with the same value to the same + // location, then the store is dead/noop. + if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) { + if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() && + ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() && + ST1->isUnindexed() && !ST1->isVolatile()) { + // The store is dead, remove it. + return Chain; + } + } + + // If this is an FP_ROUND or TRUNC followed by a store, fold this into a + // truncating store. We can do this even if this is already a truncstore. + if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) + && Value.getNode()->hasOneUse() && ST->isUnindexed() && + TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), + ST->getMemoryVT())) { + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getMemoryVT(), ST->getMemOperand()); + } + + // Only perform this optimization before the types are legal, because we + // don't want to perform this optimization on every DAGCombine invocation. + if (!LegalTypes) { + bool EverChanged = false; + + do { + // There can be multiple store sequences on the same chain. + // Keep trying to merge store sequences until we are unable to do so + // or until we merge the last store on the chain. + bool Changed = MergeConsecutiveStores(ST); + EverChanged |= Changed; + if (!Changed) break; + } while (ST->getOpcode() != ISD::DELETED_NODE); + + if (EverChanged) + return SDValue(N, 0); + } + + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // + // Make sure to do this only after attempting to merge stores in order to + // avoid changing the types of some subset of stores due to visit order, + // preventing their merging. + if (isa<ConstantFPSDNode>(Value)) { + if (SDValue NewSt = replaceStoreOfFPConstant(ST)) + return NewSt; + } + + return ReduceLoadOpStoreWidth(N); +} + +SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { + SDValue InVec = N->getOperand(0); + SDValue InVal = N->getOperand(1); + SDValue EltNo = N->getOperand(2); + SDLoc dl(N); + + // If the inserted element is an UNDEF, just use the input vector. + if (InVal.getOpcode() == ISD::UNDEF) + return InVec; + + EVT VT = InVec.getValueType(); + + // If we can't generate a legal BUILD_VECTOR, exit + if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) + return SDValue(); + + // Check that we know which element is being inserted + if (!isa<ConstantSDNode>(EltNo)) + return SDValue(); + unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + // Canonicalize insert_vector_elt dag nodes. + // Example: + // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) + // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) + // + // Do this only if the child insert_vector node has one use; also + // do this only if indices are both constants and Idx1 < Idx0. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() + && isa<ConstantSDNode>(InVec.getOperand(2))) { + unsigned OtherElt = + cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); + if (Elt < OtherElt) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + InVec.getOperand(0), InVal, EltNo); + AddToWorklist(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), + VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); + } + } + + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially + // be converted to a BUILD_VECTOR). Fill in the Ops vector with the + // vector elements. + SmallVector<SDValue, 8> Ops; + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { + Ops.append(InVec.getNode()->op_begin(), + InVec.getNode()->op_end()); + } else if (InVec.getOpcode() == ISD::UNDEF) { + unsigned NElts = VT.getVectorNumElements(); + Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); + } else { + return SDValue(); + } + + // Insert the element + if (Elt < Ops.size()) { + // All the operands of BUILD_VECTOR must have the same type; + // we enforce that here. + EVT OpVT = Ops[0].getValueType(); + if (InVal.getValueType() != OpVT) + InVal = OpVT.bitsGT(InVal.getValueType()) ? + DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : + DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); + Ops[Elt] = InVal; + } + + // Return the new vector + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + SDLoc DL(EVE); + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + Offset = DAG.getConstant(PtrOff, DL, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType); + Offset = DAG.getNode( + ISD::MUL, DL, PtrType, Offset, + DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType)); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT, + VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad( + ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI, + VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + } + WorklistRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorklist(Load.getNode()); + AddUsersToWorklist(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorklist(EVE); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + +SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { + // (vextract (scalar_to_vector val, 0) -> val + SDValue InVec = N->getOperand(0); + EVT VT = InVec.getValueType(); + EVT NVT = N->getValueType(0); + + if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { + // Check if the result type doesn't match the inserted element type. A + // SCALAR_TO_VECTOR may truncate the inserted element and the + // EXTRACT_VECTOR_ELT may widen the extracted vector. + SDValue InOp = InVec.getOperand(0); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); + } + return InOp; + } + + SDValue EltNo = N->getOperand(1); + ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo); + + // extract_vector_elt (build_vector x, y), 1 -> y + if (ConstEltNo && + InVec.getOpcode() == ISD::BUILD_VECTOR && + TLI.isTypeLegal(VT) && + (InVec.hasOneUse() || + TLI.aggressivelyPreferBuildVectorSources(VT))) { + SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue()); + EVT InEltVT = Elt.getValueType(); + + // Sometimes build_vector's scalar input types do not match result type. + if (NVT == InEltVT) + return Elt; + + // TODO: It may be useful to truncate if free if the build_vector implicitly + // converts. + } + + // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. + // We only perform this optimization before the op legalization phase because + // we may introduce new vector instructions which are not backed by TD + // patterns. For example on AVX, extracting elements from a wide vector + // without using extract_subvector. However, if we can find an underlying + // scalar value, then we can always use that. + if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) { + int NumElem = VT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); + // Find the new index to extract from. + int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue()); + + // Extracting an undef index is undef. + if (OrigElt == -1) + return DAG.getUNDEF(NVT); + + // Select the right vector half to extract from. + SDValue SVInVec; + if (OrigElt < NumElem) { + SVInVec = InVec->getOperand(0); + } else { + SVInVec = InVec->getOperand(1); + OrigElt -= NumElem; + } + + if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue InOp = SVInVec.getOperand(OrigElt); + if (InOp.getValueType() != NVT) { + assert(InOp.getValueType().isInteger() && NVT.isInteger()); + InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); + } + + return InOp; + } + + // FIXME: We should handle recursing on other vector shuffles and + // scalar_to_vector here as well. + + if (!LegalOperations) { + EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout()); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec, + DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy)); + } + } + + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && + ISD::isNormalLoad(InVec.getNode()) && + !N->getOperand(1)->hasPredecessor(InVec.getNode())) { + SDValue Index = N->getOperand(1); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + + // Perform only after legalization to ensure build_vector / vector_shuffle + // optimizations have already been done. + if (!LegalOperations) return SDValue(); + + // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) + // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) + + if (ConstEltNo) { + int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + + LoadSDNode *LN0 = nullptr; + const ShuffleVectorSDNode *SVN = nullptr; + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && + InVec.getOperand(0).getValueType() == ExtVT && + ISD::isNormalLoad(InVec.getOperand(0).getNode())) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + LN0 = cast<LoadSDNode>(InVec.getOperand(0)); + } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { + // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) + // => + // (load $addr+1*size) + + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + // If the bit convert changed the number of elements, it is unsafe + // to examine the mask. + if (BCNumEltsChanged) + return SDValue(); + + // Select the input vector, guarding against out of range extract vector. + unsigned NumElems = VT.getVectorNumElements(); + int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); + InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + InVec = InVec.getOperand(0); + } + if (ISD::isNormalLoad(InVec.getNode())) { + LN0 = cast<LoadSDNode>(InVec); + Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType()); + } + } + + // Make sure we found a non-volatile load and the extractelement is + // the only use. + if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) + return SDValue(); + + // If Idx was -1 above, Elt is going to be -1, so just return undef. + if (Elt == -1) + return DAG.getUNDEF(LVT); + + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); + } + + return SDValue(); +} + +// Simplify (build_vec (ext )) to (bitcast (build_vec )) +SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) + return SDValue(); + + unsigned NumInScalars = N->getNumOperands(); + SDLoc dl(N); + EVT VT = N->getValueType(0); + + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. + EVT SourceType = MVT::Other; + bool AllAnyExt = true; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort if the element is not an extension. + if (!ZeroExt && !AnyExt) { + SourceType = MVT::Other; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + // First time. + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + SourceType = MVT::Other; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + AllAnyExt &= AnyExt; + } + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = VT.getScalarType(); + bool ValidTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // Create a new simpler BUILD_VECTOR sequence which other optimizations can + // turn into a single shuffle instruction. + if (!ValidTypes) + return SDValue(); + + bool isLE = DAG.getDataLayout().isLittleEndian(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SDLoc(N), SourceType); + + unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); + SmallVector<SDValue, 8> Ops(NewBVElems, Filler); + + // Populate the new build_vector + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Cast = N->getOperand(i); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(SourceType); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); + assert(VecVT.getSizeInBits() == VT.getSizeInBits() && + "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); + + // The new BUILD_VECTOR node has the potential to be further optimized. + AddToWorklist(BV.getNode()); + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, VT, BV); +} + +SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { + EVT VT = N->getValueType(0); + + unsigned NumInScalars = N->getNumOperands(); + SDLoc dl(N); + + EVT SrcVT = MVT::Other; + unsigned Opcode = ISD::DELETED_NODE; + unsigned NumDefs = 0; + + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + unsigned Opc = In.getOpcode(); + + if (Opc == ISD::UNDEF) + continue; + + // If all scalar values are floats and converted from integers. + if (Opcode == ISD::DELETED_NODE && + (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { + Opcode = Opc; + } + + if (Opc != Opcode) + return SDValue(); + + EVT InVT = In.getOperand(0).getValueType(); + + // If all scalar values are typed differently, bail out. It's chosen to + // simplify BUILD_VECTOR of integer types. + if (SrcVT == MVT::Other) + SrcVT = InVT; + if (SrcVT != InVT) + return SDValue(); + NumDefs++; + } + + // If the vector has just one element defined, it's not worth to fold it into + // a vectorized one. + if (NumDefs < 2) + return SDValue(); + + assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) + && "Should only handle conversion from integer to float."); + assert(SrcVT != MVT::Other && "Cannot determine source type!"); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + + if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) + return SDValue(); + + // Just because the floating-point vector type is legal does not necessarily + // mean that the corresponding integer vector type is. + if (!isTypeLegal(NVT)) + return SDValue(); + + SmallVector<SDValue, 8> Opnds; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue In = N->getOperand(i); + + if (In.getOpcode() == ISD::UNDEF) + Opnds.push_back(DAG.getUNDEF(SrcVT)); + else + Opnds.push_back(In.getOperand(0)); + } + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); + AddToWorklist(BV.getNode()); + + return DAG.getNode(Opcode, dl, VT, BV); +} + +SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { + unsigned NumInScalars = N->getNumOperands(); + SDLoc dl(N); + EVT VT = N->getValueType(0); + + // A vector built entirely of undefs is undef. + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + if (SDValue V = reduceBuildVecExtToExtBuildVec(N)) + return V; + + if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N)) + return V; + + // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT + // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from + // at most two distinct vectors, turn this into a shuffle node. + + // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. + if (!isTypeLegal(VT)) + return SDValue(); + + // May only combine to shuffle after legalize if shuffle is legal. + if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT)) + return SDValue(); + + SDValue VecIn1, VecIn2; + bool UsesZeroVector = false; + for (unsigned i = 0; i != NumInScalars; ++i) { + SDValue Op = N->getOperand(i); + // Ignore undef inputs. + if (Op.getOpcode() == ISD::UNDEF) continue; + + // See if we can combine this build_vector into a blend with a zero vector. + if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) { + UsesZeroVector = true; + continue; + } + + // If this input is something other than a EXTRACT_VECTOR_ELT with a + // constant index, bail out. + if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op.getOperand(1))) { + VecIn1 = VecIn2 = SDValue(nullptr, 0); + break; + } + + // We allow up to two distinct input vectors. + SDValue ExtractedFromVec = Op.getOperand(0); + if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) + continue; + + if (!VecIn1.getNode()) { + VecIn1 = ExtractedFromVec; + } else if (!VecIn2.getNode() && !UsesZeroVector) { + VecIn2 = ExtractedFromVec; + } else { + // Too many inputs. + VecIn1 = VecIn2 = SDValue(nullptr, 0); + break; + } + } + + // If everything is good, we can make a shuffle operation. + if (VecIn1.getNode()) { + unsigned InNumElements = VecIn1.getValueType().getVectorNumElements(); + SmallVector<int, 8> Mask; + for (unsigned i = 0; i != NumInScalars; ++i) { + unsigned Opcode = N->getOperand(i).getOpcode(); + if (Opcode == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Operands can also be zero. + if (Opcode != ISD::EXTRACT_VECTOR_ELT) { + assert(UsesZeroVector && + (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) && + "Unexpected node found!"); + Mask.push_back(NumInScalars+i); + continue; + } + + // If extracting from the first vector, just use the index directly. + SDValue Extract = N->getOperand(i); + SDValue ExtVal = Extract.getOperand(1); + unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); + if (Extract.getOperand(0) == VecIn1) { + Mask.push_back(ExtIndex); + continue; + } + + // Otherwise, use InIdx + InputVecSize + Mask.push_back(InNumElements + ExtIndex); + } + + // Avoid introducing illegal shuffles with zero. + if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT)) + return SDValue(); + + // We can't generate a shuffle node with mismatched input and output types. + // Attempt to transform a single input vector to the correct type. + if ((VT != VecIn1.getValueType())) { + // If the input vector type has a different base type to the output + // vector type, bail out. + EVT VTElemType = VT.getVectorElementType(); + if ((VecIn1.getValueType().getVectorElementType() != VTElemType) || + (VecIn2.getNode() && + (VecIn2.getValueType().getVectorElementType() != VTElemType))) + return SDValue(); + + // If the input vector is too small, widen it. + // We only support widening of vectors which are half the size of the + // output registers. For example XMM->YMM widening on X86 with AVX. + EVT VecInT = VecIn1.getValueType(); + if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) { + // If we only have one small input, widen it by adding undef values. + if (!VecIn2.getNode()) + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, + DAG.getUNDEF(VecIn1.getValueType())); + else if (VecIn1.getValueType() == VecIn2.getValueType()) { + // If we have two small inputs of the same type, try to concat them. + VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2); + VecIn2 = SDValue(nullptr, 0); + } else + return SDValue(); + } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) { + // If the input vector is too large, try to split it. + // We don't support having two input vectors that are too large. + // If the zero vector was used, we can not split the vector, + // since we'd need 3 inputs. + if (UsesZeroVector || VecIn2.getNode()) + return SDValue(); + + if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements())) + return SDValue(); + + // Try to replace VecIn1 with two extract_subvectors + // No need to update the masks, they should still be correct. + VecIn2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(VT.getVectorNumElements(), dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + VecIn1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } else + return SDValue(); + } + + if (UsesZeroVector) + VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) : + DAG.getConstantFP(0.0, dl, VT); + else + // If VecIn2 is unused then change it to undef. + VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); + + // Check that we were able to transform all incoming values to the same + // type. + if (VecIn2.getValueType() != VecIn1.getValueType() || + VecIn1.getValueType() != VT) + return SDValue(); + + // Return the new VECTOR_SHUFFLE node. + SDValue Ops[2]; + Ops[0] = VecIn1; + Ops[1] = VecIn2; + return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); + } + + return SDValue(); +} + +static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT OpVT = N->getOperand(0).getValueType(); + + // If the operands are legal vectors, leave them alone. + if (TLI.isTypeLegal(OpVT)) + return SDValue(); + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SmallVector<SDValue, 8> Ops; + + EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + + // Keep track of what we encounter. + bool AnyInteger = false; + bool AnyFP = false; + for (const SDValue &Op : N->ops()) { + if (ISD::BITCAST == Op.getOpcode() && + !Op.getOperand(0).getValueType().isVector()) + Ops.push_back(Op.getOperand(0)); + else if (ISD::UNDEF == Op.getOpcode()) + Ops.push_back(ScalarUndef); + else + return SDValue(); + + // Note whether we encounter an integer or floating point scalar. + // If it's neither, bail out, it could be something weird like x86mmx. + EVT LastOpVT = Ops.back().getValueType(); + if (LastOpVT.isFloatingPoint()) + AnyFP = true; + else if (LastOpVT.isInteger()) + AnyInteger = true; + else + return SDValue(); + } + + // If any of the operands is a floating point scalar bitcast to a vector, + // use floating point types throughout, and bitcast everything. + // Replace UNDEFs by another scalar UNDEF node, of the final desired type. + if (AnyFP) { + SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits()); + ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT); + if (AnyInteger) { + for (SDValue &Op : Ops) { + if (Op.getValueType() == SVT) + continue; + if (Op.getOpcode() == ISD::UNDEF) + Op = ScalarUndef; + else + Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op); + } + } + } + + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT, + VT.getSizeInBits() / SVT.getSizeInBits()); + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops)); +} + +// Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR +// operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at +// most two distinct vectors the same size as the result, attempt to turn this +// into a legal shuffle. +static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + EVT OpVT = N->getOperand(0).getValueType(); + int NumElts = VT.getVectorNumElements(); + int NumOpElts = OpVT.getVectorNumElements(); + + SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT); + SmallVector<int, 8> Mask; + + for (SDValue Op : N->ops()) { + // Peek through any bitcast. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (Op.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // What vector are we extracting the subvector from and at what index? + SDValue ExtVec = Op.getOperand(0); + + // We want the EVT of the original extraction to correctly scale the + // extraction index. + EVT ExtVT = ExtVec.getValueType(); + + // Peek through any bitcast. + while (ExtVec.getOpcode() == ISD::BITCAST) + ExtVec = ExtVec.getOperand(0); + + // UNDEF nodes convert to UNDEF shuffle mask values. + if (ExtVec.getOpcode() == ISD::UNDEF) { + Mask.append((unsigned)NumOpElts, -1); + continue; + } + + if (!isa<ConstantSDNode>(Op.getOperand(1))) + return SDValue(); + int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // Ensure that we are extracting a subvector from a vector the same + // size as the result. + if (ExtVT.getSizeInBits() != VT.getSizeInBits()) + return SDValue(); + + // Scale the subvector index to account for any bitcast. + int NumExtElts = ExtVT.getVectorNumElements(); + if (0 == (NumExtElts % NumElts)) + ExtIdx /= (NumExtElts / NumElts); + else if (0 == (NumElts % NumExtElts)) + ExtIdx *= (NumElts / NumExtElts); + else + return SDValue(); + + // At most we can reference 2 inputs in the final shuffle. + if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) { + SV0 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx); + } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) { + SV1 = ExtVec; + for (int i = 0; i != NumOpElts; ++i) + Mask.push_back(i + ExtIdx + NumElts); + } else { + return SDValue(); + } + } + + if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0), + DAG.getBitcast(VT, SV1), Mask); +} + +SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { + // If we only have one input vector, we don't need to do any concatenation. + if (N->getNumOperands() == 1) + return N->getOperand(0); + + // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); + if (ISD::allOperandsUndef(N)) + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where all but the first of the vectors are undef. + if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) { + return Op.getOpcode() == ISD::UNDEF; + })) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + + // If the bitcast type isn't legal, it might be a trunc of a legal type; + // look through the trunc so we can still do the transform: + // concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar) + if (Scalar->getOpcode() == ISD::TRUNCATE && + !TLI.isTypeLegal(Scalar.getValueType()) && + TLI.isTypeLegal(Scalar->getOperand(0).getValueType())) + Scalar = Scalar->getOperand(0); + + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } + + // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR. + // We have already tested above for an UNDEF only concatenation. + // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) + // -> (BUILD_VECTOR A, B, ..., C, D, ...) + auto IsBuildVectorOrUndef = [](const SDValue &Op) { + return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode(); + }; + bool AllBuildVectorsOrUndefs = + std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef); + if (AllBuildVectorsOrUndefs) { + SmallVector<SDValue, 8> Opnds; + EVT SVT = VT.getScalarType(); + + EVT MinVT = SVT; + if (!SVT.isFloatingPoint()) { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smallest type and truncate all operands to it. + bool FoundMinVT = false; + for (const SDValue &Op : N->ops()) + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + EVT OpSVT = Op.getOperand(0)->getValueType(0); + MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT; + FoundMinVT = true; + } + assert(FoundMinVT && "Concat vector type mismatch"); + } + + for (const SDValue &Op : N->ops()) { + EVT OpVT = Op.getValueType(); + unsigned NumElts = OpVT.getVectorNumElements(); + + if (ISD::UNDEF == Op.getOpcode()) + Opnds.append(NumElts, DAG.getUNDEF(MinVT)); + + if (ISD::BUILD_VECTOR == Op.getOpcode()) { + if (SVT.isFloatingPoint()) { + assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch"); + Opnds.append(Op->op_begin(), Op->op_begin() + NumElts); + } else { + for (unsigned i = 0; i != NumElts; ++i) + Opnds.push_back( + DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i))); + } + } + } + + assert(VT.getVectorNumElements() == Opnds.size() && + "Concat vector type mismatch"); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); + } + + // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR. + if (SDValue V = combineConcatVectorOfScalars(N, DAG)) + return V; + + // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) + if (SDValue V = combineConcatVectorOfExtracts(N, DAG)) + return V; + + // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR + // nodes often generate nop CONCAT_VECTOR nodes. + // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that + // place the incoming vectors at the exact same location. + SDValue SingleSource = SDValue(); + unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue Op = N->getOperand(i); + + if (Op.getOpcode() == ISD::UNDEF) + continue; + + // Check if this is the identity extract: + if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + // Find the single incoming vector for the extract_subvector. + if (SingleSource.getNode()) { + if (Op.getOperand(0) != SingleSource) + return SDValue(); + } else { + SingleSource = Op.getOperand(0); + + // Check the source type is the same as the type of the result. + // If not, this concat may extend the vector, so we can not + // optimize it away. + if (SingleSource.getValueType() != N->getValueType(0)) + return SDValue(); + } + + unsigned IdentityIndex = i * PartNumElem; + ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + // The extract index must be constant. + if (!CS) + return SDValue(); + + // Check that we are reading from the identity index. + if (CS->getZExtValue() != IdentityIndex) + return SDValue(); + } + + if (SingleSource.getNode()) + return SingleSource; + + return SDValue(); +} + +SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { + EVT NVT = N->getValueType(0); + SDValue V = N->getOperand(0); + + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = N->getConstantOperandVal(1); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + + // Skip bitcasting + if (V->getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + SDLoc dl(N); + // Handle only simple case where vector being inserted and vector + // being extracted are of same type, and are half size of larger vectors. + EVT BigVT = V->getOperand(0).getValueType(); + EVT SmallVT = V->getOperand(1).getValueType(); + if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + return SDValue(); + + // Only handle cases where both indexes are constants with the same type. + ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); + ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); + + if (InsIdx && ExtIdx && + InsIdx->getValueType(0).getSizeInBits() <= 64 && + ExtIdx->getValueType(0).getSizeInBits() <= 64) { + // Combine: + // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) + // Into: + // indices are equal or bit offsets are equal => V1 + // otherwise => (extract_subvec V1, ExtIdx) + if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getNode(ISD::BITCAST, dl, + N->getOperand(0).getValueType(), + V->getOperand(0)), N->getOperand(1)); + } + } + + return SDValue(); +} + +static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements, + SDValue V, SelectionDAG &DAG) { + SDLoc DL(V); + EVT VT = V.getValueType(); + + switch (V.getOpcode()) { + default: + return V; + + case ISD::CONCAT_VECTORS: { + EVT OpVT = V->getOperand(0).getValueType(); + int OpSize = OpVT.getVectorNumElements(); + SmallBitVector OpUsedElements(OpSize, false); + bool FoundSimplification = false; + SmallVector<SDValue, 4> NewOps; + NewOps.reserve(V->getNumOperands()); + for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) { + SDValue Op = V->getOperand(i); + bool OpUsed = false; + for (int j = 0; j < OpSize; ++j) + if (UsedElements[i * OpSize + j]) { + OpUsedElements[j] = true; + OpUsed = true; + } + NewOps.push_back( + OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG) + : DAG.getUNDEF(OpVT)); + FoundSimplification |= Op == NewOps.back(); + OpUsedElements.reset(); + } + if (FoundSimplification) + V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps); + return V; + } + + case ISD::INSERT_SUBVECTOR: { + SDValue BaseV = V->getOperand(0); + SDValue SubV = V->getOperand(1); + auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2)); + if (!IdxN) + return V; + + int SubSize = SubV.getValueType().getVectorNumElements(); + int Idx = IdxN->getZExtValue(); + bool SubVectorUsed = false; + SmallBitVector SubUsedElements(SubSize, false); + for (int i = 0; i < SubSize; ++i) + if (UsedElements[i + Idx]) { + SubVectorUsed = true; + SubUsedElements[i] = true; + UsedElements[i + Idx] = false; + } + + // Now recurse on both the base and sub vectors. + SDValue SimplifiedSubV = + SubVectorUsed + ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG) + : DAG.getUNDEF(SubV.getValueType()); + SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG); + if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV) + V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, + SimplifiedBaseV, SimplifiedSubV, V->getOperand(2)); + return V; + } + } +} + +static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0, + SDValue N1, SelectionDAG &DAG) { + EVT VT = SVN->getValueType(0); + int NumElts = VT.getVectorNumElements(); + SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false); + for (int M : SVN->getMask()) + if (M >= 0 && M < NumElts) + N0UsedElements[M] = true; + else if (M >= NumElts) + N1UsedElements[M - NumElts] = true; + + SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG); + SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG); + if (S0 == N0 && S1 == N1) + return SDValue(); + + return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask()); +} + +// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat, +// or turn a shuffle of a single concat into simpler shuffle then concat. +static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + SmallVector<SDValue, 4> Ops; + EVT ConcatVT = N0.getOperand(0).getValueType(); + unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); + unsigned NumConcats = NumElts / NumElemsPerConcat; + + // Special case: shuffle(concat(A,B)) can be more efficiently represented + // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high + // half vector elements. + if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF && + std::all_of(SVN->getMask().begin() + NumElemsPerConcat, + SVN->getMask().end(), [](int i) { return i == -1; })) { + N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1), + makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat)); + N1 = DAG.getUNDEF(ConcatVT); + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1); + } + + // Look at every vector that's inserted. We're looking for exact + // subvector-sized copies from a concatenated vector + for (unsigned I = 0; I != NumConcats; ++I) { + // Make sure we're dealing with a copy. + unsigned Begin = I * NumElemsPerConcat; + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } + + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) + return SDValue(); + + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } + } + + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); +} + +SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); + + // Canonicalize shuffle undef, undef -> undef + if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(VT); + + ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + + // Canonicalize shuffle v, v -> v, undef + if (N0 == N1) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) Idx -= NumElts; + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N0.getOpcode() == ISD::UNDEF) { + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= 0) { + if (Idx >= (int)NumElts) + Idx -= NumElts; + else + Idx = -1; // remove reference to lhs + } + NewMask.push_back(Idx); + } + return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), + &NewMask[0]); + } + + // Remove references to rhs if it is undef + if (N1.getOpcode() == ISD::UNDEF) { + bool Changed = false; + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx >= (int)NumElts) { + Idx = -1; + Changed = true; + } + NewMask.push_back(Idx); + } + if (Changed) + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); + } + + // If it is a splat, check if the argument vector is another splat or a + // build_vector. + if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { + SDNode *V = N0.getNode(); + + // If this is a bit convert that changes the element type of the vector but + // not the number of vector elements, look through it. Be careful not to + // look though conversions that change things like v4f32 to v2f64. + if (V->getOpcode() == ISD::BITCAST) { + SDValue ConvInput = V->getOperand(0); + if (ConvInput.getValueType().isVector() && + ConvInput.getValueType().getVectorNumElements() == NumElts) + V = ConvInput.getNode(); + } + + if (V->getOpcode() == ISD::BUILD_VECTOR) { + assert(V->getNumOperands() == NumElts && + "BUILD_VECTOR has wrong number of operands"); + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + } + // Splat of <u, u, u, u>, return <u, u, u, u> + if (!Base.getNode()) + return N0; + for (unsigned i = 0; i != NumElts; ++i) { + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + } + // Splat of <x, x, x, x>, return <x, x, x, x> + if (AllSame) + return N0; + + // Canonicalize any other splat as a build_vector. + const SDValue &Splatted = V->getOperand(SVN->getSplatIndex()); + SmallVector<SDValue, 8> Ops(NumElts, Splatted); + SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + V->getValueType(0), Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (V->getValueType(0) != VT) + NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV); + return NewBV; + } + } + + // There are various patterns used to build up a vector from smaller vectors, + // subvectors, or elements. Scan chains of these and replace unused insertions + // or components with undef. + if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG)) + return S; + + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + Level < AfterLegalizeVectorOps && + (N1.getOpcode() == ISD::UNDEF || + (N1.getOpcode() == ISD::CONCAT_VECTORS && + N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { + SDValue V = partitionShuffleOfConcats(N, DAG); + + if (V.getNode()) + return V; + } + + // Attempt to combine a shuffle of 2 inputs of 'scalar sources' - + // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR. + if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) { + SmallVector<SDValue, 8> Ops; + for (int M : SVN->getMask()) { + SDValue Op = DAG.getUNDEF(VT.getScalarType()); + if (M >= 0) { + int Idx = M % NumElts; + SDValue &S = (M < (int)NumElts ? N0 : N1); + if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) { + Op = S.getOperand(Idx); + } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) { + if (Idx == 0) + Op = S.getOperand(0); + } else { + // Operand can't be combined - bail out. + break; + } + } + Ops.push_back(Op); + } + if (Ops.size() == VT.getVectorNumElements()) { + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + EVT SVT = VT.getScalarType(); + if (SVT.isInteger()) + for (SDValue &Op : Ops) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + if (SVT != VT.getScalarType()) + for (SDValue &Op : Ops) + Op = TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops); + } + } + + // If this shuffle only has a single input that is a bitcasted shuffle, + // attempt to merge the 2 shuffles and suitably bitcast the inputs/output + // back to their original types. + if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps && + TLI.isTypeLegal(VT)) { + + // Peek through the bitcast only if there is one user. + SDValue BC0 = N0; + while (BC0.getOpcode() == ISD::BITCAST) { + if (!BC0.hasOneUse()) + break; + BC0 = BC0.getOperand(0); + } + + auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) { + if (Scale == 1) + return SmallVector<int, 8>(Mask.begin(), Mask.end()); + + SmallVector<int, 8> NewMask; + for (int M : Mask) + for (int s = 0; s != Scale; ++s) + NewMask.push_back(M < 0 ? -1 : Scale * M + s); + return NewMask; + }; + + if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) { + EVT SVT = VT.getScalarType(); + EVT InnerVT = BC0->getValueType(0); + EVT InnerSVT = InnerVT.getScalarType(); + + // Determine which shuffle works with the smaller scalar type. + EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT; + EVT ScaleSVT = ScaleVT.getScalarType(); + + if (TLI.isTypeLegal(ScaleVT) && + 0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) && + 0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) { + + int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits(); + + // Scale the shuffle masks to the smaller scalar type. + ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0); + SmallVector<int, 8> InnerMask = + ScaleShuffleMask(InnerSVN->getMask(), InnerScale); + SmallVector<int, 8> OuterMask = + ScaleShuffleMask(SVN->getMask(), OuterScale); + + // Merge the shuffle masks. + SmallVector<int, 8> NewMask; + for (int M : OuterMask) + NewMask.push_back(M < 0 ? -1 : InnerMask[M]); + + // Test for shuffle mask legality over both commutations. + SDValue SV0 = BC0->getOperand(0); + SDValue SV1 = BC0->getOperand(1); + bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + if (!LegalMask) { + std::swap(SV0, SV1); + ShuffleVectorSDNode::commuteMask(NewMask); + LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT); + } + + if (LegalMask) { + SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0); + SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1); + return DAG.getNode( + ISD::BITCAST, SDLoc(N), VT, + DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask)); + } + } + } + } + + // Canonicalize shuffles according to rules: + // shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A) + // shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B) + // shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B) + if (N1.getOpcode() == ISD::VECTOR_SHUFFLE && + N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && + TLI.isTypeLegal(VT)) { + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(N1->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0 = N1->getOperand(0); + SDValue SV1 = N1->getOperand(1); + bool HasSameOp0 = N0 == SV0; + bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF; + if (HasSameOp0 || IsSV1Undef || N0 == SV1) + // Commute the operands of this shuffle so that next rule + // will trigger. + return DAG.getCommutedVectorShuffle(*SVN); + } + + // Try to fold according to rules: + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + // Don't try to fold shuffles with illegal type. + // Only fold if this shuffle is the only user of the other shuffle. + if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) && + Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) { + ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); + + // The incoming shuffle must be of the same type as the result of the + // current shuffle. + assert(OtherSV->getOperand(0).getValueType() == VT && + "Shuffle types don't match"); + + SDValue SV0, SV1; + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask for a shuffle with SV0 as the first + // operand, and SV1 as the second operand. + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = SVN->getMaskElt(i); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + SDValue CurrentVec; + if (Idx < (int)NumElts) { + // This shuffle index refers to the inner shuffle N0. Lookup the inner + // shuffle mask to identify which vector is actually referenced. + Idx = OtherSV->getMaskElt(Idx); + if (Idx < 0) { + // Propagate Undef. + Mask.push_back(Idx); + continue; + } + + CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0) + : OtherSV->getOperand(1); + } else { + // This shuffle index references an element within N1. + CurrentVec = N1; + } + + // Simple case where 'CurrentVec' is UNDEF. + if (CurrentVec.getOpcode() == ISD::UNDEF) { + Mask.push_back(-1); + continue; + } + + // Canonicalize the shuffle index. We don't know yet if CurrentVec + // will be the first or second operand of the combined shuffle. + Idx = Idx % NumElts; + if (!SV0.getNode() || SV0 == CurrentVec) { + // Ok. CurrentVec is the left hand side. + // Update the mask accordingly. + SV0 = CurrentVec; + Mask.push_back(Idx); + continue; + } + + // Bail out if we cannot convert the shuffle pair into a single shuffle. + if (SV1.getNode() && SV1 != CurrentVec) + return SDValue(); + + // Ok. CurrentVec is the right hand side. + // Update the mask accordingly. + SV1 = CurrentVec; + Mask.push_back(Idx + NumElts); + } + + // Check if all indices in Mask are Undef. In case, propagate Undef. + bool isUndefMask = true; + for (unsigned i = 0; i != NumElts && isUndefMask; ++i) + isUndefMask &= Mask[i] < 0; + + if (isUndefMask) + return DAG.getUNDEF(VT); + + if (!SV0.getNode()) + SV0 = DAG.getUNDEF(VT); + if (!SV1.getNode()) + SV1 = DAG.getUNDEF(VT); + + // Avoid introducing shuffles with illegal mask. + if (!TLI.isShuffleMaskLegal(Mask, VT)) { + ShuffleVectorSDNode::commuteMask(Mask); + + if (!TLI.isShuffleMaskLegal(Mask, VT)) + return SDValue(); + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2) + std::swap(SV0, SV1); + } + + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2) + // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2) + return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) { + SDValue InVal = N->getOperand(0); + EVT VT = N->getValueType(0); + + // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern + // with a VECTOR_SHUFFLE. + if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + SDValue InVec = InVal->getOperand(0); + SDValue EltNo = InVal->getOperand(1); + + // FIXME: We could support implicit truncation if the shuffle can be + // scaled to a smaller vector scalar type. + ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo); + if (C0 && VT == InVec.getValueType() && + VT.getScalarType() == InVal.getValueType()) { + SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1); + int Elt = C0->getZExtValue(); + NewMask[0] = Elt; + + if (TLI.isShuffleMaskLegal(NewMask, VT)) + return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT), + NewMask); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N2 = N->getOperand(2); + + // If the input vector is a concatenation, and the insert replaces + // one of the halves, we can optimize into a single concat_vectors. + if (N0.getOpcode() == ISD::CONCAT_VECTORS && + N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { + APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); + EVT VT = N->getValueType(0); + + // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors Z, Y) + if (InsIdx == 0) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N->getOperand(1), N0.getOperand(1)); + + // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> + // (concat_vectors X, Z) + if (InsIdx == VT.getVectorNumElements()/2) + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, + N0.getOperand(0), N->getOperand(1)); + } + + return SDValue(); +} + +SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold (fp_to_fp16 (fp16_to_fp op)) -> op + if (N0->getOpcode() == ISD::FP16_TO_FP) + return N0->getOperand(0); + + return SDValue(); +} + +SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) { + SDValue N0 = N->getOperand(0); + + // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op) + if (N0->getOpcode() == ISD::AND) { + ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1)); + if (AndConst && AndConst->getAPIntValue() == 0xffff) { + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0), + N0.getOperand(0)); + } + } + + return SDValue(); +} + +/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle +/// with the destination vector and a zero vector. +/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> +/// vector_shuffle V, Zero, <0, 4, 2, 4> +SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + + // Make sure we're not running after operation legalization where it + // may have custom lowered the vector shuffles. + if (LegalOperations) + return SDValue(); + + if (N->getOpcode() != ISD::AND) + return SDValue(); + + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + + if (RHS.getOpcode() != ISD::BUILD_VECTOR) + return SDValue(); + + EVT RVT = RHS.getValueType(); + unsigned NumElts = RHS.getNumOperands(); + + // Attempt to create a valid clear mask, splitting the mask into + // sub elements and checking to see if each is + // all zeros or all ones - suitable for shuffle masking. + auto BuildClearMask = [&](int Split) { + int NumSubElts = NumElts * Split; + int NumSubBits = RVT.getScalarSizeInBits() / Split; + + SmallVector<int, 8> Indices; + for (int i = 0; i != NumSubElts; ++i) { + int EltIdx = i / Split; + int SubIdx = i % Split; + SDValue Elt = RHS.getOperand(EltIdx); + if (Elt.getOpcode() == ISD::UNDEF) { + Indices.push_back(-1); + continue; + } + + APInt Bits; + if (isa<ConstantSDNode>(Elt)) + Bits = cast<ConstantSDNode>(Elt)->getAPIntValue(); + else if (isa<ConstantFPSDNode>(Elt)) + Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt(); + else + return SDValue(); + + // Extract the sub element from the constant bit mask. + if (DAG.getDataLayout().isBigEndian()) { + Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits); + } else { + Bits = Bits.lshr(SubIdx * NumSubBits); + } + + if (Split > 1) + Bits = Bits.trunc(NumSubBits); + + if (Bits.isAllOnesValue()) + Indices.push_back(i); + else if (Bits == 0) + Indices.push_back(i + NumSubElts); + else + return SDValue(); + } + + // Let's see if the target supports this vector_shuffle. + EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits); + EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts); + if (!TLI.isVectorClearMaskLegal(Indices, ClearVT)) + return SDValue(); + + SDValue Zero = DAG.getConstant(0, dl, ClearVT); + return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl, + DAG.getBitcast(ClearVT, LHS), + Zero, &Indices[0])); + }; + + // Determine maximum split level (byte level masking). + int MaxSplit = 1; + if (RVT.getScalarSizeInBits() % 8 == 0) + MaxSplit = RVT.getScalarSizeInBits() / 8; + + for (int Split = 1; Split <= MaxSplit; ++Split) + if (RVT.getScalarSizeInBits() % Split == 0) + if (SDValue S = BuildClearMask(Split)) + return S; + + return SDValue(); +} + +/// Visit a binary vector operation, like ADD. +SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { + assert(N->getValueType(0).isVector() && + "SimplifyVBinOp only works on vectors!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Ops[] = {LHS, RHS}; + + // See if we can constant fold the vector operation. + if (SDValue Fold = DAG.FoldConstantVectorArithmetic( + N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags())) + return Fold; + + // Try to convert a constant mask AND into a shuffle clear mask. + if (SDValue Shuffle = XformToShuffleWithZero(N)) + return Shuffle; + + // Type legalization might introduce new shuffles in the DAG. + // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) + // -> (shuffle (VBinOp (A, B)), Undef, Mask). + if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && + isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::UNDEF && + RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); + + if (SVN0->getMask().equals(SVN1->getMask())) { + EVT VT = N->getValueType(0); + SDValue UndefVector = LHS.getOperand(1); + SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + LHS.getOperand(0), RHS.getOperand(0), + N->getFlags()); + AddUsersToWorklist(N); + return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, + &SVN0->getMask()[0]); + } + } + + return SDValue(); +} + +SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, + SDValue N1, SDValue N2){ + assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); + + SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, + cast<CondCodeSDNode>(N0.getOperand(2))->get()); + + // If we got a simplified select_cc node back from SimplifySelectCC, then + // break it down into a new SETCC node, and a new SELECT node, and then return + // the SELECT node, since we were called with a SELECT node. + if (SCC.getNode()) { + // Check to see if we got a select_cc back (to turn into setcc/select). + // Otherwise, just return whatever node we got back, like fabs. + if (SCC.getOpcode() == ISD::SELECT_CC) { + SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), + N0.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), + SCC.getOperand(4)); + AddToWorklist(SETCC.getNode()); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC, + SCC.getOperand(2), SCC.getOperand(3)); + } + + return SCC; + } + return SDValue(); +} + +/// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values +/// being selected between, see if we can simplify the select. Callers of this +/// should assume that TheSelect is deleted if this returns true. As such, they +/// should return the appropriate thing (e.g. the node) back to the top-level of +/// the DAG combiner loop to avoid it being looked at. +bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, + SDValue RHS) { + + // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + // The select + setcc is redundant, because fsqrt returns NaN for X < -0. + if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) { + if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) { + // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?)) + SDValue Sqrt = RHS; + ISD::CondCode CC; + SDValue CmpLHS; + const ConstantFPSDNode *NegZero = nullptr; + + if (TheSelect->getOpcode() == ISD::SELECT_CC) { + CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get(); + CmpLHS = TheSelect->getOperand(0); + NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1)); + } else { + // SELECT or VSELECT + SDValue Cmp = TheSelect->getOperand(0); + if (Cmp.getOpcode() == ISD::SETCC) { + CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get(); + CmpLHS = Cmp.getOperand(0); + NegZero = isConstOrConstSplatFP(Cmp.getOperand(1)); + } + } + if (NegZero && NegZero->isNegative() && NegZero->isZero() && + Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT || + CC == ISD::SETULT || CC == ISD::SETLT)) { + // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x)) + CombineTo(TheSelect, Sqrt); + return true; + } + } + } + // Cannot simplify select with vector condition + if (TheSelect->getOperand(0).getValueType().isVector()) return false; + + // If this is a select from two identical things, try to pull the operation + // through the select. + if (LHS.getOpcode() != RHS.getOpcode() || + !LHS.hasOneUse() || !RHS.hasOneUse()) + return false; + + // If this is a load and the token chain is identical, replace the select + // of two loads with a load through a select of the address to load from. + // This triggers in things like "select bool X, 10.0, 123.0" after the FP + // constants have been dropped into the constant pool. + if (LHS.getOpcode() == ISD::LOAD) { + LoadSDNode *LLD = cast<LoadSDNode>(LHS); + LoadSDNode *RLD = cast<LoadSDNode>(RHS); + + // Token chains must be identical. + if (LHS.getOperand(0) != RHS.getOperand(0) || + // Do not let this transformation reduce the number of volatile loads. + LLD->isVolatile() || RLD->isVolatile() || + // FIXME: If either is a pre/post inc/dec load, + // we'd need to split out the address adjustment. + LLD->isIndexed() || RLD->isIndexed() || + // If this is an EXTLOAD, the VT's must match. + LLD->getMemoryVT() != RLD->getMemoryVT() || + // If this is an EXTLOAD, the kind of extension must match. + (LLD->getExtensionType() != RLD->getExtensionType() && + // The only exception is if one of the extensions is anyext. + LLD->getExtensionType() != ISD::EXTLOAD && + RLD->getExtensionType() != ISD::EXTLOAD) || + // FIXME: this discards src value information. This is + // over-conservative. It would be beneficial to be able to remember + // both potential memory locations. Since we are discarding + // src value info, don't do the transformation if the memory + // locations are not in the default address space. + LLD->getPointerInfo().getAddrSpace() != 0 || + RLD->getPointerInfo().getAddrSpace() != 0 || + !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), + LLD->getBasePtr().getValueType())) + return false; + + // Check that the select condition doesn't reach either load. If so, + // folding this will induce a cycle into the DAG. If not, this is safe to + // xform, so create a select of the addresses. + SDValue Addr; + if (TheSelect->getOpcode() == ISD::SELECT) { + SDNode *CondNode = TheSelect->getOperand(0).getNode(); + if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || + (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) + return false; + // The loads must not depend on one another. + if (LLD->isPredecessorOf(RLD) || + RLD->isPredecessorOf(LLD)) + return false; + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); + } else { // Otherwise SELECT_CC + SDNode *CondLHS = TheSelect->getOperand(0).getNode(); + SDNode *CondRHS = TheSelect->getOperand(1).getNode(); + + if ((LLD->hasAnyUseOfValue(1) && + (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || + (RLD->hasAnyUseOfValue(1) && + (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) + return false; + + Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), + TheSelect->getOperand(1), + LLD->getBasePtr(), RLD->getBasePtr(), + TheSelect->getOperand(4)); + } + + SDValue Load; + // It is safe to replace the two loads if they have different alignments, + // but the new load must be the minimum (most restrictive) alignment of the + // inputs. + bool isInvariant = LLD->isInvariant() & RLD->isInvariant(); + unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment()); + if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { + Load = DAG.getLoad(TheSelect->getValueType(0), + SDLoc(TheSelect), + // FIXME: Discards pointer and AA info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->isVolatile(), LLD->isNonTemporal(), + isInvariant, Alignment); + } else { + Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? + RLD->getExtensionType() : LLD->getExtensionType(), + SDLoc(TheSelect), + TheSelect->getValueType(0), + // FIXME: Discards pointer and AA info. + LLD->getChain(), Addr, MachinePointerInfo(), + LLD->getMemoryVT(), LLD->isVolatile(), + LLD->isNonTemporal(), isInvariant, Alignment); + } + + // Users of the select now use the result of the load. + CombineTo(TheSelect, Load); + + // Users of the old loads now use the new load's chain. We know the + // old-load value is dead now. + CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); + CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); + return true; + } + + return false; +} + +/// Simplify an expression of the form (N0 cond N1) ? N2 : N3 +/// where 'cond' is the comparison specified by CC. +SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, + SDValue N2, SDValue N3, + ISD::CondCode CC, bool NotExtCompare) { + // (x ? y : y) -> y. + if (N2 == N3) return N2; + + EVT VT = N2.getValueType(); + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); + + // Determine if the condition we're dealing with is constant + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), + N0, N1, CC, DL, false); + if (SCC.getNode()) AddToWorklist(SCC.getNode()); + + if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { + // fold select_cc true, x, y -> x + // fold select_cc false, x, y -> y + return !SCCC->isNullValue() ? N2 : N3; + } + + // Check to see if we can simplify the select into an fabs node + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { + // Allow either -0.0 or 0.0 + if (CFP->isZero()) { + // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs + if ((CC == ISD::SETGE || CC == ISD::SETGT) && + N0 == N2 && N3.getOpcode() == ISD::FNEG && + N2 == N3.getOperand(0)) + return DAG.getNode(ISD::FABS, DL, VT, N0); + + // select (setl[te] X, +/-0.0), fneg(X), X -> fabs + if ((CC == ISD::SETLT || CC == ISD::SETLE) && + N0 == N3 && N2.getOpcode() == ISD::FNEG && + N2.getOperand(0) == N3) + return DAG.getNode(ISD::FABS, DL, VT, N3); + } + } + + // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" + // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 + // in it. This is a win when the constant is not otherwise available because + // it replaces two constant pool loads with one. We only do this if the FP + // type is known to be legal, because if it isn't, then we are before legalize + // types an we want the other legalization to happen first (e.g. to avoid + // messing with soft float) and if the ConstantFP is not legal, because if + // it is legal, we may not need to store the FP constant in a constant pool. + if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) + if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { + if (TLI.isTypeLegal(N2.getValueType()) && + (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != + TargetLowering::Legal && + !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && + !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && + // If both constants have multiple uses, then we won't need to do an + // extra load, they are likely around in registers for other users. + (TV->hasOneUse() || FV->hasOneUse())) { + Constant *Elts[] = { + const_cast<ConstantFP*>(FV->getConstantFPValue()), + const_cast<ConstantFP*>(TV->getConstantFPValue()) + }; + Type *FPTy = Elts[0]->getType(); + const DataLayout &TD = DAG.getDataLayout(); + + // Create a ConstantArray of the two constants. + Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); + SDValue CPIdx = + DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()), + TD.getPrefTypeAlignment(FPTy)); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + + // Get the offsets to the 0 and 1 element of the array so that we can + // select between them. + SDValue Zero = DAG.getIntPtrConstant(0, DL); + unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); + SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV)); + + SDValue Cond = DAG.getSetCC(DL, + getSetCCResultType(N0.getValueType()), + N0, N1, CC); + AddToWorklist(Cond.getNode()); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), + Cond, One, Zero); + AddToWorklist(CstOffset.getNode()); + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, + CstOffset); + AddToWorklist(CPIdx.getNode()); + return DAG.getLoad( + TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + } + } + + // Check to see if we can perform the "gzip trick", transforming + // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) + if (isNullConstant(N3) && CC == ISD::SETLT && + (isNullConstant(N1) || // (a < 0) ? b : 0 + (isOneConstant(N1) && N0 == N2))) { // (a < 1) ? a : 0 + EVT XType = N0.getValueType(); + EVT AType = N2.getValueType(); + if (XType.bitsGE(AType)) { + // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a + // single-bit constant. + if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) { + unsigned ShCtV = N2C->getAPIntValue().logBase2(); + ShCtV = XType.getSizeInBits() - ShCtV - 1; + SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0), + getShiftAmountTy(N0.getValueType())); + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), + XType, N0, ShCt); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), + XType, N0, + DAG.getConstant(XType.getSizeInBits() - 1, + SDLoc(N0), + getShiftAmountTy(N0.getValueType()))); + AddToWorklist(Shift.getNode()); + + if (XType.bitsGT(AType)) { + Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); + AddToWorklist(Shift.getNode()); + } + + return DAG.getNode(ISD::AND, DL, AType, Shift, N2); + } + } + + // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) + // where y is has a single bit set. + // A plaintext description would be, we can turn the SELECT_CC into an AND + // when the condition can be materialized as an all-ones register. Any + // single bit-test can be materialized as an all-ones register with + // shift-left and shift-right-arith. + if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && + N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) { + SDValue AndLHS = N0->getOperand(0); + ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); + if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { + // Shift the tested bit over the sign bit. + APInt AndMask = ConstAndRHS->getAPIntValue(); + SDValue ShlAmt = + DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS), + getShiftAmountTy(AndLHS.getValueType())); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); + + // Now arithmetic right shift it all the way over, so the result is either + // all-ones, or zero. + SDValue ShrAmt = + DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl), + getShiftAmountTy(Shl.getValueType())); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); + + return DAG.getNode(ISD::AND, DL, VT, Shr, N3); + } + } + + // fold select C, 16, 0 -> shl C, 4 + if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() && + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent) { + + // If the caller doesn't want us to simplify this into a zext of a compare, + // don't do it. + if (NotExtCompare && N2C->isOne()) + return SDValue(); + + // Get a SetCC of the condition + // NOTE: Don't create a SETCC if it's not legal on this target. + if (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) { + SDValue Temp, SCC; + // cast from setcc result type to select result type + if (LegalTypes) { + SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), + N0, N1, CC); + if (N2.getValueType().bitsLT(SCC.getValueType())) + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), + N2.getValueType()); + else + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), + N2.getValueType(), SCC); + } else { + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), + N2.getValueType(), SCC); + } + + AddToWorklist(SCC.getNode()); + AddToWorklist(Temp.getNode()); + + if (N2C->isOne()) + return Temp; + + // shl setcc result by log2 n2c + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp), + getShiftAmountTy(Temp.getValueType()))); + } + } + + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> + // Y = sra (X, size(X)-1); xor (add (X, Y), Y) + if (N1C) { + ConstantSDNode *SubC = nullptr; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); + + EVT XType = N0.getValueType(); + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDLoc DL(N0); + SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, + N0, + DAG.getConstant(XType.getSizeInBits() - 1, DL, + getShiftAmountTy(N0.getValueType()))); + SDValue Add = DAG.getNode(ISD::ADD, DL, + XType, N0, Shift); + AddToWorklist(Shift.getNode()); + AddToWorklist(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); + } + } + + return SDValue(); +} + +/// This is a stub for TargetLowering::SimplifySetCC. +SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, + SDValue N1, ISD::CondCode Cond, + SDLoc DL, bool foldBooleans) { + TargetLowering::DAGCombinerInfo + DagCombineInfo(DAG, Level, false, this); + return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); +} + +/// Given an ISD::SDIV node expressing a divide by constant, return +/// a DAG expression to select that will generate the same value by multiplying +/// by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue DAGCombiner::BuildSDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (C->isNullValue()) + return SDValue(); + + std::vector<SDNode*> Built; + SDValue S = + TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + +/// Given an ISD::SDIV node expressing a divide by constant power of 2, return a +/// DAG expression that will generate the same value by right shifting. +SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (C->isNullValue()) + return SDValue(); + + std::vector<SDNode *> Built; + SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + +/// Given an ISD::UDIV node expressing a divide by constant, return a DAG +/// expression that will generate the same value by multiplying by a magic +/// number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue DAGCombiner::BuildUDIV(SDNode *N) { + ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); + if (!C) + return SDValue(); + + // Avoid division by zero. + if (C->isNullValue()) + return SDValue(); + + std::vector<SDNode*> Built; + SDValue S = + TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); + + for (SDNode *N : Built) + AddToWorklist(N); + return S; +} + +SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + + unsigned Iterations = 0; + if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { + if (Iterations) { + // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) + // For the reciprocal, we need to find the zero of the function: + // F(X) = A X - 1 [which has a zero at X = 1/A] + // => + // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form + // does not require additional intermediate precision] + EVT VT = Op.getValueType(); + SDLoc DL(Op); + SDValue FPOne = DAG.getConstantFP(1.0, DL, VT); + + AddToWorklist(Est.getNode()); + + // Newton iterations: Est = Est + Est (1 - Arg * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags); + AddToWorklist(Est.getNode()); + } + } + return Est; + } + + return SDValue(); +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = X_i (1.5 - A X_i^2 / 2) +/// As a result, we precompute A/2 prior to the iteration loop. +SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT); + + // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that + // this entire sequence requires only one FP constant. + SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags); + AddToWorklist(HalfArg.getNode()); + + HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags); + AddToWorklist(HalfArg.getNode()); + + // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags); + AddToWorklist(NewEst.getNode()); + + NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags); + AddToWorklist(NewEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags); + AddToWorklist(Est.getNode()); + } + return Est; +} + +/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) +/// For the reciprocal sqrt, we need to find the zero of the function: +/// F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] +/// => +/// X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0)) +SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est, + unsigned Iterations, + SDNodeFlags *Flags) { + EVT VT = Arg.getValueType(); + SDLoc DL(Arg); + SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT); + SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT); + + // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est) + for (unsigned i = 0; i < Iterations; ++i) { + SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags); + AddToWorklist(HalfEst.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags); + AddToWorklist(Est.getNode()); + + Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags); + AddToWorklist(Est.getNode()); + } + return Est; +} + +SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) { + if (Level >= AfterLegalizeDAG) + return SDValue(); + + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + unsigned Iterations = 0; + bool UseOneConstNR = false; + if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { + AddToWorklist(Est.getNode()); + if (Iterations) { + Est = UseOneConstNR ? + BuildRsqrtNROneConst(Op, Est, Iterations, Flags) : + BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags); + } + return Est; + } + + return SDValue(); +} + +/// Return true if base is a frame index, which is known not to alias with +/// anything but itself. Provides base object and offset as results. +static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, + const GlobalValue *&GV, const void *&CV) { + // Assume it is a primitive operation. + Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; + + // If it's an adding a simple constant then integrate the offset. + if (Base.getOpcode() == ISD::ADD) { + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { + Base = Base.getOperand(0); + Offset += C->getZExtValue(); + } + } + + // Return the underlying GlobalValue, and update the Offset. Return false + // for GlobalAddressSDNode since the same GlobalAddress may be represented + // by multiple nodes with different offsets. + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { + GV = G->getGlobal(); + Offset += G->getOffset(); + return false; + } + + // Return the underlying Constant value, and update the Offset. Return false + // for ConstantSDNodes since the same constant pool entry may be represented + // by multiple nodes with different offsets. + if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { + CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() + : (const void *)C->getConstVal(); + Offset += C->getOffset(); + return false; + } + // If it's any of the following then it can't alias with anything but itself. + return isa<FrameIndexSDNode>(Base); +} + +/// Return true if there is any possibility that the two addresses overlap. +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { + // If they are the same then they must be aliases. + if (Op0->getBasePtr() == Op1->getBasePtr()) return true; + + // If they are both volatile then they cannot be reordered. + if (Op0->isVolatile() && Op1->isVolatile()) return true; + + // If one operation reads from invariant memory, and the other may store, they + // cannot alias. These should really be checking the equivalent of mayWrite, + // but it only matters for memory nodes other than load /store. + if (Op0->isInvariant() && Op1->writeMem()) + return false; + + if (Op1->isInvariant() && Op0->writeMem()) + return false; + + // Gather base node and offset information. + SDValue Base1, Base2; + int64_t Offset1, Offset2; + const GlobalValue *GV1, *GV2; + const void *CV1, *CV2; + bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), + Base1, Offset1, GV1, CV1); + bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), + Base2, Offset2, GV2, CV2); + + // If they have a same base address then check to see if they overlap. + if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + + // It is possible for different frame indices to alias each other, mostly + // when tail call optimization reuses return address slots for arguments. + // To catch this case, look up the actual index of frame indices to compute + // the real alias relationship. + if (isFrameIndex1 && isFrameIndex2) { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); + Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); + return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || + (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); + } + + // Otherwise, if we know what the bases are, and they aren't identical, then + // we know they cannot alias. + if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) + return false; + + // If we know required SrcValue1 and SrcValue2 have relatively large alignment + // compared to the size and offset of the access, we may be able to prove they + // do not alias. This check is conservative for now to catch cases created by + // splitting vector types. + if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && + (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && + (Op0->getMemoryVT().getSizeInBits() >> 3 == + Op1->getMemoryVT().getSizeInBits() >> 3) && + (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { + int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); + int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); + + // There is no overlap between these relatively aligned accesses of similar + // size, return no alias. + if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || + (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) + return false; + } + + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 + ? CombinerGlobalAA + : DAG.getSubtarget().useAA(); +#ifndef NDEBUG + if (CombinerAAOnlyFunc.getNumOccurrences() && + CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) + UseAA = false; +#endif + if (UseAA && + Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { + // Use alias analysis information. + int64_t MinOffset = std::min(Op0->getSrcValueOffset(), + Op1->getSrcValueOffset()); + int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + + Op0->getSrcValueOffset() - MinOffset; + int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + + Op1->getSrcValueOffset() - MinOffset; + AliasResult AAResult = + AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1, + UseTBAA ? Op0->getAAInfo() : AAMDNodes()), + MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2, + UseTBAA ? Op1->getAAInfo() : AAMDNodes())); + if (AAResult == NoAlias) + return false; + } + + // Otherwise we have to assume they alias. + return true; +} + +/// Walk up chain skipping non-aliasing memory nodes, +/// looking for aliasing nodes and adding them to the Aliases vector. +void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, + SmallVectorImpl<SDValue> &Aliases) { + SmallVector<SDValue, 8> Chains; // List of chains to visit. + SmallPtrSet<SDNode *, 16> Visited; // Visited node set. + + // Get alias information for node. + bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); + + // Starting off. + Chains.push_back(OriginalChain); + unsigned Depth = 0; + + // Look at each chain and determine if it is an alias. If so, add it to the + // aliases list. If not, then continue up the chain looking for the next + // candidate. + while (!Chains.empty()) { + SDValue Chain = Chains.pop_back_val(); + + // For TokenFactor nodes, look at each operand and only continue up the + // chain until we reach the depth limit. + // + // FIXME: The depth check could be made to return the last non-aliasing + // chain we found before we hit a tokenfactor rather than the original + // chain. + if (Depth > TLI.getGatherAllAliasesMaxDepth()) { + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + // Don't bother if we've been before. + if (!Visited.insert(Chain.getNode()).second) + continue; + + switch (Chain.getOpcode()) { + case ISD::EntryToken: + // Entry token is ideal chain operand, but handled in FindBetterChain. + break; + + case ISD::LOAD: + case ISD::STORE: { + // Get alias information for Chain. + bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && + !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); + + // If chain is alias then stop here. + if (!(IsLoad && IsOpLoad) && + isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { + Aliases.push_back(Chain); + } else { + // Look further up the chain. + Chains.push_back(Chain.getOperand(0)); + ++Depth; + } + break; + } + + case ISD::TokenFactor: + // We have to check each of the operands of the token factor for "small" + // token factors, so we queue them up. Adding the operands to the queue + // (stack) in reverse order maintains the original order and increases the + // likelihood that getNode will find a matching token factor (CSE.) + if (Chain.getNumOperands() > 16) { + Aliases.push_back(Chain); + break; + } + for (unsigned n = Chain.getNumOperands(); n;) + Chains.push_back(Chain.getOperand(--n)); + ++Depth; + break; + + default: + // For all other instructions we will just have to take what we can get. + Aliases.push_back(Chain); + break; + } + } + + // We need to be careful here to also search for aliases through the + // value operand of a store, etc. Consider the following situation: + // Token1 = ... + // L1 = load Token1, %52 + // S1 = store Token1, L1, %51 + // L2 = load Token1, %52+8 + // S2 = store Token1, L2, %51+8 + // Token2 = Token(S1, S2) + // L3 = load Token2, %53 + // S3 = store Token2, L3, %52 + // L4 = load Token2, %53+8 + // S4 = store Token2, L4, %52+8 + // If we search for aliases of S3 (which loads address %52), and we look + // only through the chain, then we'll miss the trivial dependence on L1 + // (which also loads from %52). We then might change all loads and + // stores to use Token1 as their chain operand, which could result in + // copying %53 into %52 before copying %52 into %51 (which should + // happen first). + // + // The problem is, however, that searching for such data dependencies + // can become expensive, and the cost is not directly related to the + // chain depth. Instead, we'll rule out such configurations here by + // insisting that we've visited all chain users (except for users + // of the original chain, which is not necessary). When doing this, + // we need to look through nodes we don't care about (otherwise, things + // like register copies will interfere with trivial cases). + + SmallVector<const SDNode *, 16> Worklist; + for (const SDNode *N : Visited) + if (N != OriginalChain.getNode()) + Worklist.push_back(N); + + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + + // We have already visited M, and want to make sure we've visited any uses + // of M that we care about. For uses that we've not visisted, and don't + // care about, queue them to the worklist. + + for (SDNode::use_iterator UI = M->use_begin(), + UIE = M->use_end(); UI != UIE; ++UI) + if (UI.getUse().getValueType() == MVT::Other && + Visited.insert(*UI).second) { + if (isa<MemSDNode>(*UI)) { + // We've not visited this use, and we care about it (it could have an + // ordering dependency with the original node). + Aliases.clear(); + Aliases.push_back(OriginalChain); + return; + } + + // We've not visited this use, but we don't care about it. Mark it as + // visited and enqueue it to the worklist. + Worklist.push_back(*UI); + } + } +} + +/// Walk up chain skipping non-aliasing memory nodes, looking for a better chain +/// (aliasing node.) +SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { + SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. + + // Accumulate all the aliases to this node. + GatherAllAliases(N, OldChain, Aliases); + + // If no operands then chain to entry token. + if (Aliases.size() == 0) + return DAG.getEntryNode(); + + // If a single operand then chain to it. We don't need to revisit it. + if (Aliases.size() == 1) + return Aliases[0]; + + // Construct a custom tailored token factor. + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); +} + +bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) { + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); + + // We must have a base and an offset. + if (!BasePtr.Base.getNode()) + return false; + + // Do not handle stores to undef base pointers. + if (BasePtr.Base.getOpcode() == ISD::UNDEF) + return false; + + SmallVector<StoreSDNode *, 8> ChainedStores; + ChainedStores.push_back(St); + + // Walk up the chain and look for nodes with offsets from the same + // base pointer. Stop when reaching an instruction with a different kind + // or instruction which has a different base pointer. + StoreSDNode *Index = St; + while (Index) { + // If the chain has more than one use, then we can't reorder the mem ops. + if (Index != St && !SDValue(Index, 0)->hasOneUse()) + break; + + if (Index->isVolatile() || Index->isIndexed()) + break; + + // Find the base pointer and offset for this memory node. + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); + + // Check that the base pointer is the same as the original one. + if (!Ptr.equalBaseIndex(BasePtr)) + break; + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (true) { + if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { + // We found a store node. Use it for the next iteration. + ChainedStores.push_back(STn); + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = nullptr; + break; + } + } + } + + bool MadeChange = false; + SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains; + + for (StoreSDNode *ChainedStore : ChainedStores) { + SDValue Chain = ChainedStore->getChain(); + SDValue BetterChain = FindBetterChain(ChainedStore, Chain); + + if (Chain != BetterChain) { + MadeChange = true; + BetterChains.push_back(std::make_pair(ChainedStore, BetterChain)); + } + } + + // Do all replacements after finding the replacements to make to avoid making + // the chains more complicated by introducing new TokenFactors. + for (auto Replacement : BetterChains) + replaceStoreChain(Replacement.first, Replacement.second); + + return MadeChange; +} + +/// This is the entry point for the file. +void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, + CodeGenOpt::Level OptLevel) { + /// This is the main entry point to this class. + DAGCombiner(*this, AA, OptLevel).Run(Level); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp new file mode 100644 index 0000000..cfbb209 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -0,0 +1,2220 @@ +//===-- FastISel.cpp - Implementation of the FastISel class ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the implementation of the FastISel class. +// +// "Fast" instruction selection is designed to emit very poor code quickly. +// Also, it is not designed to be able to do much lowering, so most illegal +// types (e.g. i64 on 32-bit targets) and operations are not supported. It is +// also not intended to be able to do much optimization, except in a few cases +// where doing optimizations reduces overall compile time. For example, folding +// constants into immediate fields is often done, because it's cheap and it +// reduces the number of instructions later phases have to examine. +// +// "Fast" instruction selection is able to fail gracefully and transfer +// control to the SelectionDAG selector for operations that it doesn't +// support. In many cases, this allows us to avoid duplicating a lot of +// the complicated lowering logic that SelectionDAG currently has. +// +// The intended use for "fast" instruction selection is "-O0" mode +// compilation, where the quality of the generated code is irrelevant when +// weighed against the speed at which the code can be generated. Also, +// at -O0, the LLVM optimizers are not running, and this makes the +// compile time of codegen a much higher portion of the overall compile +// time. Despite its limitations, "fast" instruction selection is able to +// handle enough code on its own to provide noticeable overall speedups +// in -O0 compiles. +// +// Basic operations are supported in a target-independent way, by reading +// the same instruction descriptions that the SelectionDAG selector reads, +// and identifying simple arithmetic operations that can be directly selected +// from simple operators. More complicated operations currently require +// target-specific code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Analysis.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "isel" + +STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " + "target-independent selector"); +STATISTIC(NumFastIselSuccessTarget, "Number of insts selected by " + "target-specific selector"); +STATISTIC(NumFastIselDead, "Number of dead insts removed on failure"); + +void FastISel::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + IsSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + IsZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + IsInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + IsSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + IsNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + IsByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + IsInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + IsReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + +/// Set the current block to which generated machine instructions will be +/// appended, and clear the local CSE map. +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + // Instructions are appended to FuncInfo.MBB. If the basic block already + // contains labels or copies, use the last instruction as the last local + // value. + EmitStartPt = nullptr; + if (!FuncInfo.MBB->empty()) + EmitStartPt = &FuncInfo.MBB->back(); + LastLocalValue = EmitStartPt; +} + +bool FastISel::lowerArguments() { + if (!FuncInfo.CanLowerReturn) + // Fallback to SDISel argument lowering code to deal with sret pointer + // parameter. + return false; + + if (!fastLowerArguments()) + return false; + + // Enter arguments into ValueMap for uses in non-entry BBs. + for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), + E = FuncInfo.Fn->arg_end(); + I != E; ++I) { + DenseMap<const Value *, unsigned>::iterator VI = LocalValueMap.find(&*I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[&*I] = VI->second; + } + return true; +} + +void FastISel::flushLocalValueMap() { + LocalValueMap.clear(); + LastLocalValue = EmitStartPt; + recomputeInsertPt(); + SavedInsertPt = FuncInfo.InsertPt; +} + +bool FastISel::hasTrivialKill(const Value *V) { + // Don't consider constants or arguments to have trivial kills. + const Instruction *I = dyn_cast<Instruction>(V); + if (!I) + return false; + + // No-op casts are trivially coalesced by fast-isel. + if (const auto *Cast = dyn_cast<CastInst>(I)) + if (Cast->isNoopCast(DL.getIntPtrType(Cast->getContext())) && + !hasTrivialKill(Cast->getOperand(0))) + return false; + + // Even the value might have only one use in the LLVM IR, it is possible that + // FastISel might fold the use into another instruction and now there is more + // than one use at the Machine Instruction level. + unsigned Reg = lookUpRegForValue(V); + if (Reg && !MRI.use_empty(Reg)) + return false; + + // GEPs with all zero indices are trivially coalesced by fast-isel. + if (const auto *GEP = dyn_cast<GetElementPtrInst>(I)) + if (GEP->hasAllZeroIndices() && !hasTrivialKill(GEP->getOperand(0))) + return false; + + // Only instructions with a single use in the same basic block are considered + // to have trivial kills. + return I->hasOneUse() && + !(I->getOpcode() == Instruction::BitCast || + I->getOpcode() == Instruction::PtrToInt || + I->getOpcode() == Instruction::IntToPtr) && + cast<Instruction>(*I->user_begin())->getParent() == I->getParent(); +} + +unsigned FastISel::getRegForValue(const Value *V) { + EVT RealVT = TLI.getValueType(DL, V->getType(), /*AllowUnknown=*/true); + // Don't handle non-simple values in FastISel. + if (!RealVT.isSimple()) + return 0; + + // Ignore illegal types. We must do this before looking up the value + // in ValueMap because Arguments are given virtual registers regardless + // of whether FastISel can handle them. + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT)) { + // Handle integer promotions, though, because they're common and easy. + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) + VT = TLI.getTypeToTransformTo(V->getContext(), VT).getSimpleVT(); + else + return 0; + } + + // Look up the value to see if we already have a register for it. + unsigned Reg = lookUpRegForValue(V); + if (Reg) + return Reg; + + // In bottom-up mode, just create the virtual register which will be used + // to hold the value. It will be materialized later. + if (isa<Instruction>(V) && + (!isa<AllocaInst>(V) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) + return FuncInfo.InitializeRegForValue(V); + + SavePoint SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); + + leaveLocalValueArea(SaveInsertPt); + + return Reg; +} + +unsigned FastISel::materializeConstant(const Value *V, MVT VT) { + unsigned Reg = 0; + if (const auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getValue().getActiveBits() <= 64) + Reg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); + } else if (isa<AllocaInst>(V)) + Reg = fastMaterializeAlloca(cast<AllocaInst>(V)); + else if (isa<ConstantPointerNull>(V)) + // Translate this as an integer zero so that it can be + // local-CSE'd with actual integer zeros. + Reg = getRegForValue( + Constant::getNullValue(DL.getIntPtrType(V->getContext()))); + else if (const auto *CF = dyn_cast<ConstantFP>(V)) { + if (CF->isNullValue()) + Reg = fastMaterializeFloatZero(CF); + else + // Try to emit the constant directly. + Reg = fastEmit_f(VT, VT, ISD::ConstantFP, CF); + + if (!Reg) { + // Try to emit the constant by using an integer constant with a cast. + const APFloat &Flt = CF->getValueAPF(); + EVT IntVT = TLI.getPointerTy(DL); + + uint64_t x[2]; + uint32_t IntBitWidth = IntVT.getSizeInBits(); + bool isExact; + (void)Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, + APFloat::rmTowardZero, &isExact); + if (isExact) { + APInt IntVal(IntBitWidth, x); + + unsigned IntegerReg = + getRegForValue(ConstantInt::get(V->getContext(), IntVal)); + if (IntegerReg != 0) + Reg = fastEmit_r(IntVT.getSimpleVT(), VT, ISD::SINT_TO_FP, IntegerReg, + /*Kill=*/false); + } + } + } else if (const auto *Op = dyn_cast<Operator>(V)) { + if (!selectOperator(Op, Op->getOpcode())) + if (!isa<Instruction>(Op) || + !fastSelectInstruction(cast<Instruction>(Op))) + return 0; + Reg = lookUpRegForValue(Op); + } else if (isa<UndefValue>(V)) { + Reg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + } + return Reg; +} + +/// Helper for getRegForValue. This function is called when the value isn't +/// already available in a register and must be materialized with new +/// instructions. +unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { + unsigned Reg = 0; + // Give the target-specific code a try first. + if (isa<Constant>(V)) + Reg = fastMaterializeConstant(cast<Constant>(V)); + + // If target-specific code couldn't or didn't want to handle the value, then + // give target-independent code a try. + if (!Reg) + Reg = materializeConstant(V, VT); + + // Don't cache constant materializations in the general ValueMap. + // To do so would require tracking what uses they dominate. + if (Reg) { + LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } + return Reg; +} + +unsigned FastISel::lookUpRegForValue(const Value *V) { + // Look up the value to see if we already have a register for it. We + // cache values defined by Instructions across blocks, and other values + // only locally. This is because Instructions already have the SSA + // def-dominates-use requirement enforced. + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; + return LocalValueMap[V]; +} + +void FastISel::updateValueMap(const Value *I, unsigned Reg, unsigned NumRegs) { + if (!isa<Instruction>(I)) { + LocalValueMap[I] = Reg; + return; + } + + unsigned &AssignedReg = FuncInfo.ValueMap[I]; + if (AssignedReg == 0) + // Use the new register. + AssignedReg = Reg; + else if (Reg != AssignedReg) { + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + for (unsigned i = 0; i < NumRegs; i++) + FuncInfo.RegFixups[AssignedReg + i] = Reg + i; + + AssignedReg = Reg; + } +} + +std::pair<unsigned, bool> FastISel::getRegForGEPIndex(const Value *Idx) { + unsigned IdxN = getRegForValue(Idx); + if (IdxN == 0) + // Unhandled operand. Halt "fast" selection and bail. + return std::pair<unsigned, bool>(0, false); + + bool IdxNIsKill = hasTrivialKill(Idx); + + // If the index is smaller or larger than intptr_t, truncate or extend it. + MVT PtrVT = TLI.getPointerTy(DL); + EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); + if (IdxVT.bitsLT(PtrVT)) { + IdxN = fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::SIGN_EXTEND, IdxN, + IdxNIsKill); + IdxNIsKill = true; + } else if (IdxVT.bitsGT(PtrVT)) { + IdxN = + fastEmit_r(IdxVT.getSimpleVT(), PtrVT, ISD::TRUNCATE, IdxN, IdxNIsKill); + IdxNIsKill = true; + } + return std::pair<unsigned, bool>(IdxN, IdxNIsKill); +} + +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + FuncInfo.MBB = FuncInfo.InsertPt->getParent(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +void FastISel::removeDeadCode(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E) { + assert(I && E && std::distance(I, E) > 0 && "Invalid iterator!"); + while (I != E) { + MachineInstr *Dead = &*I; + ++I; + Dead->eraseFromParent(); + ++NumFastIselDead; + } + recomputeInsertPt(); +} + +FastISel::SavePoint FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + DebugLoc OldDL = DbgLoc; + recomputeInsertPt(); + DbgLoc = DebugLoc(); + SavePoint SP = {OldInsertPt, OldDL}; + return SP; +} + +void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = std::prev(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt.InsertPt; + DbgLoc = OldInsertPt.DL; +} + +bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) { + EVT VT = EVT::getEVT(I->getType(), /*HandleUnknown=*/true); + if (VT == MVT::Other || !VT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // We only handle legal types. For example, on x86-32 the instruction + // selector contains all of the 64-bit instructions from x86-64, + // under the assumption that i64 won't be used if the target doesn't + // support it. + if (!TLI.isTypeLegal(VT)) { + // MVT::i1 is special. Allow AND, OR, or XOR because they + // don't require additional zeroing, which makes them easy. + if (VT == MVT::i1 && (ISDOpcode == ISD::AND || ISDOpcode == ISD::OR || + ISDOpcode == ISD::XOR)) + VT = TLI.getTypeToTransformTo(I->getContext(), VT); + else + return false; + } + + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(0))) + if (isa<Instruction>(I) && cast<Instruction>(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (!Op1) + return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = + fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, Op1IsKill, + CI->getZExtValue(), VT.getSimpleVT()); + if (!ResultReg) + return false; + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // Check if the second operand is a constant and handle it appropriately. + if (const auto *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { + uint64_t Imm = CI->getSExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa<BinaryOperator>(I) && + cast<BinaryOperator>(I)->isExact() && isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; + } + + // Transform "urem x, pow2" -> "and x, pow2-1". + if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && + isPowerOf2_64(Imm)) { + --Imm; + ISDOpcode = ISD::AND; + } + + unsigned ResultReg = fastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (!ResultReg) + return false; + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; + } + + // Check if the second operand is a constant float. + if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) { + unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, CF); + if (ResultReg) { + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; + } + } + + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (!Op1) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + // Now we have both operands in registers. Emit the instruction. + unsigned ResultReg = fastEmit_rr(VT.getSimpleVT(), VT.getSimpleVT(), + ISDOpcode, Op0, Op0IsKill, Op1, Op1IsKill); + if (!ResultReg) + // Target-specific code wasn't able to find a machine opcode for + // the given ISD opcode and type. Halt "fast" selection and bail. + return false; + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, ResultReg); + return true; +} + +bool FastISel::selectGetElementPtr(const User *I) { + unsigned N = getRegForValue(I->getOperand(0)); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool NIsKill = hasTrivialKill(I->getOperand(0)); + + // Keep a running tab of the total offset to coalesce multiple N = N + Offset + // into a single N = N + TotalOffset. + uint64_t TotalOffs = 0; + // FIXME: What's a good SWAG number for MaxOffs? + uint64_t MaxOffs = 2048; + Type *Ty = I->getOperand(0)->getType(); + MVT VT = TLI.getPointerTy(DL); + for (GetElementPtrInst::const_op_iterator OI = I->op_begin() + 1, + E = I->op_end(); + OI != E; ++OI) { + const Value *Idx = *OI; + if (auto *StTy = dyn_cast<StructType>(Ty)) { + uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue(); + if (Field) { + // N = N + Offset + TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); + if (TotalOffs >= MaxOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + } + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + + // If this is a constant subscript, handle it quickly. + if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { + if (CI->isZero()) + continue; + // N = N + Offset + uint64_t IdxN = CI->getValue().sextOrTrunc(64).getSExtValue(); + TotalOffs += DL.getTypeAllocSize(Ty) * IdxN; + if (TotalOffs >= MaxOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + continue; + } + if (TotalOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + NIsKill = true; + TotalOffs = 0; + } + + // N = N + Idx * ElementSize; + uint64_t ElementSize = DL.getTypeAllocSize(Ty); + std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); + unsigned IdxN = Pair.first; + bool IdxNIsKill = Pair.second; + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. + return false; + + if (ElementSize != 1) { + IdxN = fastEmit_ri_(VT, ISD::MUL, IdxN, IdxNIsKill, ElementSize, VT); + if (!IdxN) // Unhandled operand. Halt "fast" selection and bail. + return false; + IdxNIsKill = true; + } + N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + } + } + if (TotalOffs) { + N = fastEmit_ri_(VT, ISD::ADD, N, NIsKill, TotalOffs, VT); + if (!N) // Unhandled operand. Halt "fast" selection and bail. + return false; + } + + // We successfully emitted code for the given LLVM Instruction. + updateValueMap(I, N); + return true; +} + +bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, + const CallInst *CI, unsigned StartIdx) { + for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + Value *Val = CI->getArgOperand(i); + // Check for constants and encode them with a StackMaps::ConstantOp prefix. + if (const auto *C = dyn_cast<ConstantInt>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); + } else if (isa<ConstantPointerNull>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(0)); + } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { + // Values coming from a stack location also require a sepcial encoding, + // but that is added later on by the target specific frame index + // elimination implementation. + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + Ops.push_back(MachineOperand::CreateFI(SI->second)); + else + return false; + } else { + unsigned Reg = getRegForValue(Val); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + return true; +} + +bool FastISel::selectStackmap(const CallInst *I) { + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, + // [live variables...]) + assert(I->getCalledFunction()->getReturnType()->isVoidTy() && + "Stackmap cannot return a value."); + + // The stackmap intrinsic only records the live variables (the arguments + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target-specific lowering code. + // Instead we perform the call lowering right here. + // + // CALLSEQ_START(0...) + // STACKMAP(id, nbytes, ...) + // CALLSEQ_END(0, 0) + // + SmallVector<MachineOperand, 32> Ops; + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Push live variables for the stack map (skipping the first two arguments + // <id> and <numBytes>). + if (!addStackMapLiveVars(Ops, I, 2)) + return false; + + // We are not adding any register mask info here, because the stackmap doesn't + // clobber anything. + + // Add scratch registers as implicit def and early clobber. + CallingConv::ID CC = I->getCallingConv(); + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + auto Builder = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)); + const MCInstrDesc &MCID = Builder.getInstr()->getDesc(); + for (unsigned I = 0, E = MCID.getNumOperands(); I < E; ++I) + Builder.addImm(0); + + // Issue STACKMAP. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::STACKMAP)); + for (auto const &MO : Ops) + MIB.addOperand(MO); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(0) + .addImm(0); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); + + return true; +} + +/// \brief Lower an argument list according to the target calling convention. +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx, + unsigned NumArgs, const Value *Callee, + bool ForceRetVoidTy, CallLoweringInfo &CLI) { + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *RetTy = ForceRetVoidTy ? Type::getVoidTy(CI->getType()->getContext()) + : CI->getType(); + CLI.setCallee(CI->getCallingConv(), RetTy, Callee, std::move(Args), NumArgs); + + return lowerCallTo(CLI); +} + +FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee( + const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy, + const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) { + SmallString<32> MangledName; + Mangler::getNameWithPrefix(MangledName, Target, DL); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); + return setCallee(CC, ResultTy, Sym, std::move(ArgsList), FixedArgs); +} + +bool FastISel::selectPatchpoint(const CallInst *I) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + CallingConv::ID CC = I->getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !I->getType()->isVoidTy(); + Value *Callee = I->getOperand(PatchPointOpers::TargetPos)->stripPointerCasts(); + + // Get the real number of arguments participating in the call <numArgs> + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)) && + "Expected a constant integer."); + const auto *NumArgsVal = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NArgPos)); + unsigned NumArgs = NumArgsVal->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + // This includes all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(I->getNumArgOperands() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + CallLoweringInfo CLI; + CLI.setIsPatchPoint(); + if (!lowerCallOperands(I, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, CLI)) + return false; + + assert(CLI.Call && "No call instruction specified."); + + SmallVector<MachineOperand, 32> Ops; + + // Add an explicit result reg if we use the anyreg calling convention. + if (IsAnyRegCC && HasDef) { + assert(CLI.NumResultRegs == 0 && "Unexpected result register."); + CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64)); + CLI.NumResultRegs = 1; + Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true)); + } + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Add the call target. + if (const auto *C = dyn_cast<IntToPtrInst>(Callee)) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else if (const auto *C = dyn_cast<ConstantExpr>(Callee)) { + if (C->getOpcode() == Instruction::IntToPtr) { + uint64_t CalleeConstAddr = + cast<ConstantInt>(C->getOperand(0))->getZExtValue(); + Ops.push_back(MachineOperand::CreateImm(CalleeConstAddr)); + } else + llvm_unreachable("Unsupported ConstantExpr."); + } else if (const auto *GV = dyn_cast<GlobalValue>(Callee)) { + Ops.push_back(MachineOperand::CreateGA(GV, 0)); + } else if (isa<ConstantPointerNull>(Callee)) + Ops.push_back(MachineOperand::CreateImm(0)); + else + llvm_unreachable("Unsupported callee address."); + + // Adjust <numArgs> to account for any arguments that have been passed on + // the stack instead. + unsigned NumCallRegArgs = IsAnyRegCC ? NumArgs : CLI.OutRegs.size(); + Ops.push_back(MachineOperand::CreateImm(NumCallRegArgs)); + + // Add the calling convention + Ops.push_back(MachineOperand::CreateImm((unsigned)CC)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) { + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) { + unsigned Reg = getRegForValue(I->getArgOperand(i)); + if (!Reg) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + // Push the arguments from the call instruction. + for (auto Reg : CLI.OutRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + + // Push live variables for the stack map. + if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs)) + return false; + + // Push the register mask info. + Ops.push_back(MachineOperand::CreateRegMask( + TRI.getCallPreservedMask(*FuncInfo.MF, CC))); + + // Add scratch registers as implicit def and early clobber. + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Add implicit defs (return values). + for (auto Reg : CLI.InRegs) + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true, + /*IsImpl=*/true)); + + // Insert the patchpoint instruction before the call generated by the target. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc, + TII.get(TargetOpcode::PATCHPOINT)); + + for (auto &MO : Ops) + MIB.addOperand(MO); + + MIB->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + // Delete the original call instruction. + CLI.Call->eraseFromParent(); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); + + if (CLI.NumResultRegs) + updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs); + return true; +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(FastISel::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + +bool FastISel::lowerCallTo(const CallInst *CI, const char *SymName, + unsigned NumArgs) { + MCContext &Ctx = MF->getContext(); + SmallString<32> MangledName; + Mangler::getNameWithPrefix(MangledName, SymName, DL); + MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName); + return lowerCallTo(CI, Sym, NumArgs); +} + +bool FastISel::lowerCallTo(const CallInst *CI, MCSymbol *Symbol, + unsigned NumArgs) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = 0; ArgI != NumArgs; ++ArgI) { + Value *V = CI->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + ArgListEntry Entry; + Entry.Val = V; + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, ArgI + 1); + Args.push_back(Entry); + } + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FTy, Symbol, std::move(Args), CS, NumArgs); + + return lowerCallTo(CLI); +} + +bool FastISel::lowerCallTo(CallLoweringInfo &CLI) { + // Handle the incoming return values from the call. + CLI.clearIns(); + SmallVector<EVT, 4> RetTys; + ComputeValueVTs(TLI, DL, CLI.RetTy, RetTys); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, TLI, DL); + + bool CanLowerReturn = TLI.CanLowerReturn( + CLI.CallConv, *FuncInfo.MF, CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + // FIXME: sret demotion isn't supported yet - bail out. + if (!CanLowerReturn) + return false; + + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = TLI.getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + + // Handle all of the outgoing arguments. + CLI.clearOuts(); + for (auto &Arg : CLI.getArgs()) { + Type *FinalType = Arg.Ty; + if (Arg.IsByVal) + FinalType = cast<PointerType>(Arg.Ty)->getElementType(); + bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + + ISD::ArgFlagsTy Flags; + if (Arg.IsZExt) + Flags.setZExt(); + if (Arg.IsSExt) + Flags.setSExt(); + if (Arg.IsInReg) + Flags.setInReg(); + if (Arg.IsSRet) + Flags.setSRet(); + if (Arg.IsByVal) + Flags.setByVal(); + if (Arg.IsInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling in + // the various CC lowering callbacks. + Flags.setByVal(); + } + if (Arg.IsByVal || Arg.IsInAlloca) { + PointerType *Ty = cast<PointerType>(Arg.Ty); + Type *ElementTy = Ty->getElementType(); + unsigned FrameSize = DL.getTypeAllocSize(ElementTy); + // For ByVal, alignment should come from FE. BE will guess if this info is + // not there, but there are cases it cannot get right. + unsigned FrameAlign = Arg.Alignment; + if (!FrameAlign) + FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL); + Flags.setByValSize(FrameSize); + Flags.setByValAlign(FrameAlign); + } + if (Arg.IsNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty); + Flags.setOrigAlign(OriginalAlignment); + + CLI.OutVals.push_back(Arg.Val); + CLI.OutFlags.push_back(Flags); + } + + if (!fastLowerCall(CLI)) + return false; + + // Set all unused physreg defs as dead. + assert(CLI.Call && "No call instruction specified."); + CLI.Call->setPhysRegsDeadExcept(CLI.InRegs, TRI); + + if (CLI.NumResultRegs && CLI.CS) + updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs); + + return true; +} + +bool FastISel::lowerCall(const CallInst *CI) { + ImmutableCallSite CS(CI); + + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FuncTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FuncTy->getReturnType(); + + ArgListTy Args; + ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + Entry.Val = V; + Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within fastLowerCall. + bool IsTailCall = CI->isTailCall(); + if (IsTailCall && !isInTailCallPosition(CS, TM)) + IsTailCall = false; + + CallLoweringInfo CLI; + CLI.setCallee(RetTy, FuncTy, CI->getCalledValue(), std::move(Args), CS) + .setTailCall(IsTailCall); + + return lowerCallTo(CLI); +} + +bool FastISel::selectCall(const User *I) { + const CallInst *Call = cast<CallInst>(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast<InlineAsm>(Call->getCalledValue())) { + // If the inline asm has side effects, then make sure that no local value + // lives across by flushing the local value map. + if (IA->hasSideEffects()) + flushLocalValueMap(); + + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + MachineModuleInfo &MMI = FuncInfo.MF->getMMI(); + ComputeUsesVAFloatArgument(*Call, &MMI); + + // Handle intrinsic function calls. + if (const auto *II = dyn_cast<IntrinsicInst>(Call)) + return selectIntrinsicCall(II); + + // Usually, it does not make sense to initialize a value, + // make an unrelated function call and use the value, because + // it tends to be spilled on the stack. So, we move the pointer + // to the last local value to the beginning of the block, so that + // all the values which have already been materialized, + // appear after the call. It also makes sense to skip intrinsics + // since they tend to be inlined. + flushLocalValueMap(); + + return lowerCall(Call); +} + +bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { + switch (II->getIntrinsicID()) { + default: + break; + // At -O0 we don't care about the lifetime intrinsics. + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // The donothing intrinsic does, well, nothing. + case Intrinsic::donothing: + return true; + case Intrinsic::dbg_declare: { + const DbgDeclareInst *DI = cast<DbgDeclareInst>(II); + assert(DI->getVariable() && "Missing variable"); + if (!FuncInfo.MF->getMMI().hasDebugInfo()) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + const Value *Address = DI->getAddress(); + if (!Address || isa<UndefValue>(Address)) { + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + return true; + } + + unsigned Offset = 0; + Optional<MachineOperand> Op; + if (const auto *Arg = dyn_cast<Argument>(Address)) + // Some arguments' frame index is recorded during argument lowering. + Offset = FuncInfo.getArgumentFrameIndex(Arg); + if (Offset) + Op = MachineOperand::CreateFI(Offset); + if (!Op) + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); + + // If we have a VLA that has a "use" in a metadata node that's then used + // here but it has no other uses, then we have a problem. E.g., + // + // int foo (const int *x) { + // char a[*x]; + // return 0; + // } + // + // If we assign 'a' a vreg and fast isel later on has to use the selection + // DAG isel, it will want to copy the value to the vreg. However, there are + // no uses, which goes counter to what selection DAG isel expects. + if (!Op && !Address->use_empty() && isa<Instruction>(Address) && + (!isa<AllocaInst>(Address) || + !FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Address)))) + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) { + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); + if (Op->isReg()) { + Op->setIsDebug(true); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, + DI->getVariable(), DI->getExpression()); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } + return true; + } + case Intrinsic::dbg_value: { + // This form of DBG_VALUE is target-independent. + const DbgValueInst *DI = cast<DbgValueInst>(II); + const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); + const Value *V = DI->getValue(); + assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) && + "Expected inlined-at fields to agree"); + if (!V) { + // Currently the optimizer can produce this; insert an undef to + // help debugging. Probably the optimizer should not do this. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(0U) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addCImm(CI) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addImm(CI->getZExtValue()) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (const auto *CF = dyn_cast<ConstantFP>(V)) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addFPImm(CF) + .addImm(DI->getOffset()) + .addMetadata(DI->getVariable()) + .addMetadata(DI->getExpression()); + } else if (unsigned Reg = lookUpRegForValue(V)) { + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = DI->getOffset() != 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, IsIndirect, Reg, + DI->getOffset(), DI->getVariable(), DI->getExpression()); + } else { + // We can't yet handle anything else here because it would require + // generating code, thus altering codegen because of debug info. + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } + return true; + } + case Intrinsic::objectsize: { + ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1)); + unsigned long long Res = CI->isZero() ? -1ULL : 0; + Constant *ResCI = ConstantInt::get(II->getType(), Res); + unsigned ResultReg = getRegForValue(ResCI); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } + case Intrinsic::expect: { + unsigned ResultReg = getRegForValue(II->getArgOperand(0)); + if (!ResultReg) + return false; + updateValueMap(II, ResultReg); + return true; + } + case Intrinsic::experimental_stackmap: + return selectStackmap(II); + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + return selectPatchpoint(II); + } + + return fastLowerIntrinsicCall(II); +} + +bool FastISel::selectCast(const User *I, unsigned Opcode) { + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); + + if (SrcVT == MVT::Other || !SrcVT.isSimple() || DstVT == MVT::Other || + !DstVT.isSimple()) + // Unhandled type. Halt "fast" selection and bail. + return false; + + // Check if the destination type is legal. + if (!TLI.isTypeLegal(DstVT)) + return false; + + // Check if the source operand is legal. + if (!TLI.isTypeLegal(SrcVT)) + return false; + + unsigned InputReg = getRegForValue(I->getOperand(0)); + if (!InputReg) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + bool InputRegIsKill = hasTrivialKill(I->getOperand(0)); + + unsigned ResultReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), + Opcode, InputReg, InputRegIsKill); + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +bool FastISel::selectBitCast(const User *I) { + // If the bitcast doesn't change the type, just use the operand value. + if (I->getType() == I->getOperand(0)->getType()) { + unsigned Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + return false; + updateValueMap(I, Reg); + return true; + } + + // Bitcasts of other values become reg-reg copies or BITCAST operators. + EVT SrcEVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstEVT = TLI.getValueType(DL, I->getType()); + if (SrcEVT == MVT::Other || DstEVT == MVT::Other || + !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) + // Unhandled type. Halt "fast" selection and bail. + return false; + + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DstVT = DstEVT.getSimpleVT(); + unsigned Op0 = getRegForValue(I->getOperand(0)); + if (!Op0) // Unhandled operand. Halt "fast" selection and bail. + return false; + bool Op0IsKill = hasTrivialKill(I->getOperand(0)); + + // First, try to perform the bitcast by inserting a reg-reg copy. + unsigned ResultReg = 0; + if (SrcVT == DstVT) { + const TargetRegisterClass *SrcClass = TLI.getRegClassFor(SrcVT); + const TargetRegisterClass *DstClass = TLI.getRegClassFor(DstVT); + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(Op0); + } + } + + // If the reg-reg copy failed, select a BITCAST opcode. + if (!ResultReg) + ResultReg = fastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); + + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +// Remove local value instructions starting from the instruction after +// SavedLastLocalValue to the current function insert point. +void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue) +{ + MachineInstr *CurLastLocalValue = getLastLocalValue(); + if (CurLastLocalValue != SavedLastLocalValue) { + // Find the first local value instruction to be deleted. + // This is the instruction after SavedLastLocalValue if it is non-NULL. + // Otherwise it's the first instruction in the block. + MachineBasicBlock::iterator FirstDeadInst(SavedLastLocalValue); + if (SavedLastLocalValue) + ++FirstDeadInst; + else + FirstDeadInst = FuncInfo.MBB->getFirstNonPHI(); + setLastLocalValue(SavedLastLocalValue); + removeDeadCode(FirstDeadInst, FuncInfo.InsertPt); + } +} + +bool FastISel::selectInstruction(const Instruction *I) { + MachineInstr *SavedLastLocalValue = getLastLocalValue(); + // Just before the terminator instruction, insert instructions to + // feed PHI nodes in successor blocks. + if (isa<TerminatorInst>(I)) + if (!handlePHINodesInSuccessorBlocks(I->getParent())) { + // PHI node handling may have generated local value instructions, + // even though it failed to handle all PHI nodes. + // We remove these instructions because SelectionDAGISel will generate + // them again. + removeDeadLocalValueCode(SavedLastLocalValue); + return false; + } + + DbgLoc = I->getDebugLoc(); + + SavedInsertPt = FuncInfo.InsertPt; + + if (const auto *Call = dyn_cast<CallInst>(I)) { + const Function *F = Call->getCalledFunction(); + LibFunc::Func Func; + + // As a special case, don't handle calls to builtin library functions that + // may be translated directly to target instructions. + if (F && !F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) + return false; + + // Don't handle Intrinsic::trap if a trap function is specified. + if (F && F->getIntrinsicID() == Intrinsic::trap && + Call->hasFnAttr("trap-func-name")) + return false; + } + + // First, try doing target-independent selection. + if (!SkipTargetIndependentISel) { + if (selectOperator(I, I->getOpcode())) { + ++NumFastIselSuccessIndependent; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + SavedInsertPt = FuncInfo.InsertPt; + } + // Next, try calling the target to attempt to handle the instruction. + if (fastSelectInstruction(I)) { + ++NumFastIselSuccessTarget; + DbgLoc = DebugLoc(); + return true; + } + // Remove dead code. + recomputeInsertPt(); + if (SavedInsertPt != FuncInfo.InsertPt) + removeDeadCode(FuncInfo.InsertPt, SavedInsertPt); + + DbgLoc = DebugLoc(); + // Undo phi node updates, because they will be added again by SelectionDAG. + if (isa<TerminatorInst>(I)) { + // PHI node handling may have generated local value instructions. + // We remove them because SelectionDAGISel will generate them again. + removeDeadLocalValueCode(SavedLastLocalValue); + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + } + return false; +} + +/// Emit an unconditional branch to the given block, unless it is the immediate +/// (fall-through) successor, and update the CFG. +void FastISel::fastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + // For more accurate line information if this is the only instruction + // in the block then emit it, otherwise we have the unconditional + // fall-through case, which needs no instructions. + } else { + // The unconditional branch case. + TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, + SmallVector<MachineOperand, 0>(), DbgLoc); + } + if (FuncInfo.BPI) { + auto BranchProbability = FuncInfo.BPI->getEdgeProbability( + FuncInfo.MBB->getBasicBlock(), MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(MSucc); +} + +void FastISel::finishCondBranch(const BasicBlock *BranchBB, + MachineBasicBlock *TrueMBB, + MachineBasicBlock *FalseMBB) { + // Add TrueMBB as successor unless it is equal to the FalseMBB: This can + // happen in degenerate IR and MachineIR forbids to have a block twice in the + // successor/predecessor lists. + if (TrueMBB != FalseMBB) { + if (FuncInfo.BPI) { + auto BranchProbability = + FuncInfo.BPI->getEdgeProbability(BranchBB, TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchProbability); + } else + FuncInfo.MBB->addSuccessorWithoutProb(TrueMBB); + } + + fastEmitBranch(FalseMBB, DbgLoc); +} + +/// Emit an FNeg operation. +bool FastISel::selectFNeg(const User *I) { + unsigned OpReg = getRegForValue(BinaryOperator::getFNegArgument(I)); + if (!OpReg) + return false; + bool OpRegIsKill = hasTrivialKill(I); + + // If the target has ISD::FNEG, use it. + EVT VT = TLI.getValueType(DL, I->getType()); + unsigned ResultReg = fastEmit_r(VT.getSimpleVT(), VT.getSimpleVT(), ISD::FNEG, + OpReg, OpRegIsKill); + if (ResultReg) { + updateValueMap(I, ResultReg); + return true; + } + + // Bitcast the value to integer, twiddle the sign bit with xor, + // and then bitcast it back to floating-point. + if (VT.getSizeInBits() > 64) + return false; + EVT IntVT = EVT::getIntegerVT(I->getContext(), VT.getSizeInBits()); + if (!TLI.isTypeLegal(IntVT)) + return false; + + unsigned IntReg = fastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(), + ISD::BITCAST, OpReg, OpRegIsKill); + if (!IntReg) + return false; + + unsigned IntResultReg = fastEmit_ri_( + IntVT.getSimpleVT(), ISD::XOR, IntReg, /*IsKill=*/true, + UINT64_C(1) << (VT.getSizeInBits() - 1), IntVT.getSimpleVT()); + if (!IntResultReg) + return false; + + ResultReg = fastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(), ISD::BITCAST, + IntResultReg, /*IsKill=*/true); + if (!ResultReg) + return false; + + updateValueMap(I, ResultReg); + return true; +} + +bool FastISel::selectExtractValue(const User *U) { + const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(U); + if (!EVI) + return false; + + // Make sure we only try to handle extracts with a legal result. But also + // allow i1 because it's easy. + EVT RealVT = TLI.getValueType(DL, EVI->getType(), /*AllowUnknown=*/true); + if (!RealVT.isSimple()) + return false; + MVT VT = RealVT.getSimpleVT(); + if (!TLI.isTypeLegal(VT) && VT != MVT::i1) + return false; + + const Value *Op0 = EVI->getOperand(0); + Type *AggTy = Op0->getType(); + + // Get the base result register. + unsigned ResultReg; + DenseMap<const Value *, unsigned>::iterator I = FuncInfo.ValueMap.find(Op0); + if (I != FuncInfo.ValueMap.end()) + ResultReg = I->second; + else if (isa<Instruction>(Op0)) + ResultReg = FuncInfo.InitializeRegForValue(Op0); + else + return false; // fast-isel can't handle aggregate constants at the moment + + // Get the actual result register, which is an offset from the base register. + unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices()); + + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, DL, AggTy, AggValueVTs); + + for (unsigned i = 0; i < VTIndex; i++) + ResultReg += TLI.getNumRegisters(FuncInfo.Fn->getContext(), AggValueVTs[i]); + + updateValueMap(EVI, ResultReg); + return true; +} + +bool FastISel::selectOperator(const User *I, unsigned Opcode) { + switch (Opcode) { + case Instruction::Add: + return selectBinaryOp(I, ISD::ADD); + case Instruction::FAdd: + return selectBinaryOp(I, ISD::FADD); + case Instruction::Sub: + return selectBinaryOp(I, ISD::SUB); + case Instruction::FSub: + // FNeg is currently represented in LLVM IR as a special case of FSub. + if (BinaryOperator::isFNeg(I)) + return selectFNeg(I); + return selectBinaryOp(I, ISD::FSUB); + case Instruction::Mul: + return selectBinaryOp(I, ISD::MUL); + case Instruction::FMul: + return selectBinaryOp(I, ISD::FMUL); + case Instruction::SDiv: + return selectBinaryOp(I, ISD::SDIV); + case Instruction::UDiv: + return selectBinaryOp(I, ISD::UDIV); + case Instruction::FDiv: + return selectBinaryOp(I, ISD::FDIV); + case Instruction::SRem: + return selectBinaryOp(I, ISD::SREM); + case Instruction::URem: + return selectBinaryOp(I, ISD::UREM); + case Instruction::FRem: + return selectBinaryOp(I, ISD::FREM); + case Instruction::Shl: + return selectBinaryOp(I, ISD::SHL); + case Instruction::LShr: + return selectBinaryOp(I, ISD::SRL); + case Instruction::AShr: + return selectBinaryOp(I, ISD::SRA); + case Instruction::And: + return selectBinaryOp(I, ISD::AND); + case Instruction::Or: + return selectBinaryOp(I, ISD::OR); + case Instruction::Xor: + return selectBinaryOp(I, ISD::XOR); + + case Instruction::GetElementPtr: + return selectGetElementPtr(I); + + case Instruction::Br: { + const BranchInst *BI = cast<BranchInst>(I); + + if (BI->isUnconditional()) { + const BasicBlock *LLVMSucc = BI->getSuccessor(0); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + fastEmitBranch(MSucc, BI->getDebugLoc()); + return true; + } + + // Conditional branches are not handed yet. + // Halt "fast" selection and bail. + return false; + } + + case Instruction::Unreachable: + if (TM.Options.TrapUnreachable) + return fastEmit_(MVT::Other, MVT::Other, ISD::TRAP) != 0; + else + return true; + + case Instruction::Alloca: + // FunctionLowering has the static-sized case covered. + if (FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(I))) + return true; + + // Dynamic-sized alloca is not handled yet. + return false; + + case Instruction::Call: + return selectCall(I); + + case Instruction::BitCast: + return selectBitCast(I); + + case Instruction::FPToSI: + return selectCast(I, ISD::FP_TO_SINT); + case Instruction::ZExt: + return selectCast(I, ISD::ZERO_EXTEND); + case Instruction::SExt: + return selectCast(I, ISD::SIGN_EXTEND); + case Instruction::Trunc: + return selectCast(I, ISD::TRUNCATE); + case Instruction::SIToFP: + return selectCast(I, ISD::SINT_TO_FP); + + case Instruction::IntToPtr: // Deliberate fall-through. + case Instruction::PtrToInt: { + EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType()); + EVT DstVT = TLI.getValueType(DL, I->getType()); + if (DstVT.bitsGT(SrcVT)) + return selectCast(I, ISD::ZERO_EXTEND); + if (DstVT.bitsLT(SrcVT)) + return selectCast(I, ISD::TRUNCATE); + unsigned Reg = getRegForValue(I->getOperand(0)); + if (!Reg) + return false; + updateValueMap(I, Reg); + return true; + } + + case Instruction::ExtractValue: + return selectExtractValue(I); + + case Instruction::PHI: + llvm_unreachable("FastISel shouldn't visit PHI nodes!"); + + default: + // Unhandled instruction. Halt "fast" selection and bail. + return false; + } +} + +FastISel::FastISel(FunctionLoweringInfo &FuncInfo, + const TargetLibraryInfo *LibInfo, + bool SkipTargetIndependentISel) + : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()), + TII(*MF->getSubtarget().getInstrInfo()), + TLI(*MF->getSubtarget().getTargetLowering()), + TRI(*MF->getSubtarget().getRegisterInfo()), LibInfo(LibInfo), + SkipTargetIndependentISel(SkipTargetIndependentISel) {} + +FastISel::~FastISel() {} + +bool FastISel::fastLowerArguments() { return false; } + +bool FastISel::fastLowerCall(CallLoweringInfo & /*CLI*/) { return false; } + +bool FastISel::fastLowerIntrinsicCall(const IntrinsicInst * /*II*/) { + return false; +} + +unsigned FastISel::fastEmit_(MVT, MVT, unsigned) { return 0; } + +unsigned FastISel::fastEmit_r(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/) { + return 0; +} + +unsigned FastISel::fastEmit_rr(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/) { + return 0; +} + +unsigned FastISel::fastEmit_i(MVT, MVT, unsigned, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::fastEmit_f(MVT, MVT, unsigned, + const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, uint64_t /*Imm*/) { + return 0; +} + +unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, + const ConstantFP * /*FPImm*/) { + return 0; +} + +unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/, + bool /*Op0IsKill*/, unsigned /*Op1*/, + bool /*Op1IsKill*/, uint64_t /*Imm*/) { + return 0; +} + +/// This method is a wrapper of fastEmit_ri. It first tries to emit an +/// instruction with an immediate operand using fastEmit_ri. +/// If that fails, it materializes the immediate into a register and try +/// fastEmit_rr instead. +unsigned FastISel::fastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, + bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + + // First check if immediate type is legal. If not, we can't use the ri form. + unsigned ResultReg = fastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); + if (ResultReg) + return ResultReg; + unsigned MaterialReg = fastEmit_i(ImmType, ImmType, ISD::Constant, Imm); + bool IsImmKill = true; + if (!MaterialReg) { + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + IntegerType *ITy = + IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); + if (!MaterialReg) + return 0; + // FIXME: If the materialized register here has no uses yet then this + // will be the first use and we should be able to mark it as killed. + // However, the local value area for materialising constant expressions + // grows down, not up, which means that any constant expressions we generate + // later which also use 'Imm' could be after this instruction and therefore + // after this kill. + IsImmKill = false; + } + return fastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, MaterialReg, IsImmKill); +} + +unsigned FastISel::createResultReg(const TargetRegisterClass *RC) { + return MRI.createVirtualRegister(RC); +} + +unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op, + unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op); + return NewOp; + } + } + return Op; +} + +unsigned FastISel::fastEmitInst_(unsigned MachineInstOpcode, + const TargetRegisterClass *RC) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg); + return ResultReg; +} + +unsigned FastISel::fastEmitInst_r(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, unsigned Op2, + bool Op2IsKill) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addReg(Op2, getKillRegState(Op2IsKill)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_ri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, uint64_t Imm1, + uint64_t Imm2) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addImm(Imm1) + .addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_f(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + const ConstantFP *FPImm) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addFPImm(FPImm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addFPImm(FPImm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_rri(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, unsigned Op0, + bool Op0IsKill, unsigned Op1, + bool Op1IsKill, uint64_t Imm) { + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + unsigned ResultReg = createResultReg(RC); + Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs()); + Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(Op0, getKillRegState(Op0IsKill)) + .addReg(Op1, getKillRegState(Op1IsKill)) + .addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, uint64_t Imm) { + unsigned ResultReg = createResultReg(RC); + const MCInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) + .addImm(Imm); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + +unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, + bool Op0IsKill, uint32_t Idx) { + unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + const TargetRegisterClass *RC = MRI.getRegClass(Op0); + MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0, getKillRegState(Op0IsKill), Idx); + return ResultReg; +} + +/// Emit MachineInstrs to compute the value of Op with all but the least +/// significant bit set to zero. +unsigned FastISel::fastEmitZExtFromI1(MVT VT, unsigned Op0, bool Op0IsKill) { + return fastEmit_ri(VT, VT, ISD::AND, Op0, Op0IsKill, 1); +} + +/// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. +/// Emit code to ensure constants are copied into registers when needed. +/// Remember the virtual registers that need to be added to the Machine PHI +/// nodes as input. We cannot just directly add them, because expansion +/// might result in multiple MBB's for one BB. As such, the start of the +/// BB might correspond to a different MBB than the end. +bool FastISel::handlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + FuncInfo.OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); + + // Check successor nodes' PHI nodes that expect a constant to be available + // from this block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) + continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB).second) + continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const auto *PN = dyn_cast<PHINode>(I); ++I) { + + // Ignore dead phi's. + if (PN->use_empty()) + continue; + + // Only handle legal types. Two interesting things to note here. First, + // by bailing out early, we may leave behind some dead instructions, + // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its + // own moves. Second, this check is necessary because FastISel doesn't + // use CreateRegs to create registers, so it always creates + // exactly one register for each non-void instruction. + EVT VT = TLI.getValueType(DL, PN->getType(), /*AllowUnknown=*/true); + if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { + // Handle integer promotions, though, because they're common and easy. + if (!(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + return false; + } + } + + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + // Set the DebugLoc for the copy. Prefer the location of the operand + // if there is one; use the location of the PHI otherwise. + DbgLoc = PN->getDebugLoc(); + if (const auto *Inst = dyn_cast<Instruction>(PHIOp)) + DbgLoc = Inst->getDebugLoc(); + + unsigned Reg = getRegForValue(PHIOp); + if (!Reg) { + FuncInfo.PHINodesToUpdate.resize(FuncInfo.OrigNumPHINodesToUpdate); + return false; + } + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + DbgLoc = DebugLoc(); + } + } + + return true; +} + +bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { + assert(LI->hasOneUse() && + "tryToFoldLoad expected a LoadInst with a single use"); + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->user_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->user_back(); + } + + // If we didn't find the fold instruction, then we failed to collapse the + // sequence. + if (TheUser != FoldInst) + return false; + + // Don't try to fold volatile loads. Target has to deal with alignment + // constraints. + if (LI->isVolatile()) + return false; + + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. + unsigned LoadReg = getRegForValue(LI); + if (!LoadReg) + return false; + + // We can't fold if this vreg has no uses or more than one use. Multiple uses + // may mean that the instruction got lowered to multiple MIs, or the use of + // the loaded value ended up being multiple operands of the result. + if (!MRI.hasOneUse(LoadReg)) + return false; + + MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LoadReg); + MachineInstr *User = RI->getParent(); + + // Set the insertion point properly. Folding the load can cause generation of + // other random instructions (like sign extends) for addressing modes; make + // sure they get inserted in a logical place before the new instruction. + FuncInfo.InsertPt = User; + FuncInfo.MBB = User->getParent(); + + // Ask the target to try folding the load. + return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); +} + +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa<AddOperator>(Add)) + return false; + // Type size needs to match. + if (DL.getTypeSizeInBits(GEP->getType()) != + DL.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa<Instruction>(Add) && + FuncInfo.MBBMap[cast<Instruction>(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); +} + +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + unsigned Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast<LoadInst>(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast<StoreInst>(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else + return nullptr; + + bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr; + bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr; + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + AAMDNodes AAInfo; + I->getAAMetadata(AAInfo); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = DL.getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, AAInfo, Ranges); +} + +CmpInst::Predicate FastISel::optimizeCmpPredicate(const CmpInst *CI) const { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp new file mode 100644 index 0000000..b62bd2b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -0,0 +1,579 @@ +//===-- FunctionLoweringInfo.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "function-lowering-info" + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { + if (I->use_empty()) return false; + if (isa<PHINode>(I)) return true; + const BasicBlock *BB = I->getParent(); + for (const User *U : I->users()) + if (cast<Instruction>(U)->getParent() != BB || isa<PHINode>(U)) + return true; + + return false; +} + +static ISD::NodeType getPreferredExtendForValue(const Value *V) { + // For the users of the source value being used for compare instruction, if + // the number of signed predicate is greater than unsigned predicate, we + // prefer to use SIGN_EXTEND. + // + // With this optimization, we would be able to reduce some redundant sign or + // zero extension instruction, and eventually more machine CSE opportunities + // can be exposed. + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + unsigned NumOfSigned = 0, NumOfUnsigned = 0; + for (const User *U : V->users()) { + if (const auto *CI = dyn_cast<CmpInst>(U)) { + NumOfSigned += CI->isSigned(); + NumOfUnsigned += CI->isUnsigned(); + } + } + if (NumOfSigned > NumOfUnsigned) + ExtendKind = ISD::SIGN_EXTEND; + + return ExtendKind; +} + +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, + SelectionDAG *DAG) { + Fn = &fn; + MF = &mf; + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + MachineModuleInfo &MMI = MF->getMMI(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + + // Check whether the function can return without sret-demotion. + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI, + mf.getDataLayout()); + CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), Outs, Fn->getContext()); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::const_iterator BB = Fn->begin(), EB = Fn->end(); + for (; BB != EB; ++BB) + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); + I != E; ++I) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) { + Type *Ty = AI->getAllocatedType(); + unsigned Align = + std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty), + AI->getAlignment()); + unsigned StackAlign = TFI->getStackAlignment(); + + // Static allocas can be folded into the initial stack frame + // adjustment. For targets that don't realign the stack, don't + // do this if there is an extra alignment requirement. + if (AI->isStaticAlloca() && + (TFI->isStackRealignable() || (Align <= StackAlign))) { + const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize()); + uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI); + } else { + // FIXME: Overaligned static allocas should be grouped into + // a single dynamic allocation instead of using a separate + // stack allocation for each one. + if (Align <= StackAlign) + Align = 0; + // Inform the Frame Information that we have variable-sized objects. + MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI); + } + } + + // Look for inline asm that clobbers the SP register. + if (isa<CallInst>(I) || isa<InvokeInst>(I)) { + ImmutableCallSite CS(&*I); + if (isa<InlineAsm>(CS.getCalledValue())) { + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + std::vector<TargetLowering::AsmOperandInfo> Ops = + TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS); + for (size_t I = 0, E = Ops.size(); I != E; ++I) { + TargetLowering::AsmOperandInfo &Op = Ops[I]; + if (Op.Type == InlineAsm::isClobber) { + // Clobbers don't have SDValue operands, hence SDValue(). + TLI->ComputeConstraintToUse(Op, SDValue(), DAG); + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode, + Op.ConstraintVT); + if (PhysReg.first == SP) + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + } + } + } + } + + // Look for calls to the @llvm.va_start intrinsic. We can omit some + // prologue boilerplate for variadic functions that don't examine their + // arguments. + if (const auto *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::vastart) + MF->getFrameInfo()->setHasVAStart(true); + } + + // If we have a musttail call in a variadic function, we need to ensure we + // forward implicit register parameters. + if (const auto *CI = dyn_cast<CallInst>(I)) { + if (CI->isMustTailCall() && Fn->isVarArg()) + MF->getFrameInfo()->setHasMustTailInVarArgFunc(true); + } + + // Mark values used outside their block as exported, by allocating + // a virtual register for them. + if (isUsedOutsideOfDefiningBlock(&*I)) + if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I))) + InitializeRegForValue(&*I); + + // Collect llvm.dbg.declare information. This is done now instead of + // during the initial isel pass through the IR so that it is done + // in a predictable order. + if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) { + assert(DI->getVariable() && "Missing variable"); + assert(DI->getDebugLoc() && "Missing location"); + if (MMI.hasDebugInfo()) { + // Don't handle byval struct arguments or VLAs, for example. + // Non-byval arguments are handled here (they refer to the stack + // temporary alloca at this point). + const Value *Address = DI->getAddress(); + if (Address) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + DenseMap<const AllocaInst *, int>::iterator SI = + StaticAllocaMap.find(AI); + if (SI != StaticAllocaMap.end()) { // Check for VLAs. + int FI = SI->second; + MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(), + FI, DI->getDebugLoc()); + } + } + } + } + } + + // Decide the preferred extend type for a value. + PreferredExtendType[&*I] = getPreferredExtendForValue(&*I); + } + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(); BB != EB; ++BB) { + // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks + // are really data, and no instructions can live here. + if (BB->isEHPad()) { + const Instruction *I = BB->getFirstNonPHI(); + // If this is a non-landingpad EH pad, mark this function as using + // funclets. + // FIXME: SEH catchpads do not create funclets, so we could avoid setting + // this in such cases in order to improve frame layout. + if (!isa<LandingPadInst>(I)) { + MMI.setHasEHFunclets(true); + MF->getFrameInfo()->setHasOpaqueSPAdjustment(true); + } + if (isa<CatchSwitchInst>(I)) { + assert(&*BB->begin() == I && + "WinEHPrepare failed to remove PHIs from imaginary BBs"); + continue; + } + if (isa<FuncletPadInst>(I)) + assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs"); + } + + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB); + MBBMap[&*BB] = MBB; + MF->push_back(MBB); + + // Transfer the address-taken flag. This is necessary because there could + // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only + // the first one should be marked. + if (BB->hasAddressTaken()) + MBB->setHasAddressTaken(); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + for (BasicBlock::const_iterator I = BB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + if (PN->use_empty()) continue; + + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + + DebugLoc DL = PN->getDebugLoc(); + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } + + // Mark landing pad blocks. + SmallVector<const LandingPadInst *, 4> LPads; + for (BB = Fn->begin(); BB != EB; ++BB) { + const Instruction *FNP = BB->getFirstNonPHI(); + if (BB->isEHPad() && MBBMap.count(&*BB)) + MBBMap[&*BB]->setIsEHPad(); + if (const auto *LPI = dyn_cast<LandingPadInst>(FNP)) + LPads.push_back(LPI); + } + + // If this personality uses funclets, we need to do a bit more work. + if (!Fn->hasPersonalityFn()) + return; + EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn()); + if (!isFuncletEHPersonality(Personality)) + return; + + // Calculate state numbers if we haven't already. + WinEHFuncInfo &EHInfo = *MF->getWinEHFuncInfo(); + if (Personality == EHPersonality::MSVC_CXX) + calculateWinCXXEHStateNumbers(&fn, EHInfo); + else if (isAsynchronousEHPersonality(Personality)) + calculateSEHStateNumbers(&fn, EHInfo); + else if (Personality == EHPersonality::CoreCLR) + calculateClrEHStateNumbers(&fn, EHInfo); + + calculateCatchReturnSuccessorColors(&fn, EHInfo); + + // Map all BB references in the WinEH data to MBBs. + for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { + for (WinEHHandlerType &H : TBME.HandlerArray) { + if (H.CatchObj.Alloca) { + assert(StaticAllocaMap.count(H.CatchObj.Alloca)); + H.CatchObj.FrameIndex = StaticAllocaMap[H.CatchObj.Alloca]; + } else { + H.CatchObj.FrameIndex = INT_MAX; + } + if (H.Handler) + H.Handler = MBBMap[H.Handler.get<const BasicBlock *>()]; + } + } + for (CxxUnwindMapEntry &UME : EHInfo.CxxUnwindMap) + if (UME.Cleanup) + UME.Cleanup = MBBMap[UME.Cleanup.get<const BasicBlock *>()]; + for (SEHUnwindMapEntry &UME : EHInfo.SEHUnwindMap) { + const BasicBlock *BB = UME.Handler.get<const BasicBlock *>(); + UME.Handler = MBBMap[BB]; + } + for (ClrEHUnwindMapEntry &CME : EHInfo.ClrEHUnwindMap) { + const BasicBlock *BB = CME.Handler.get<const BasicBlock *>(); + CME.Handler = MBBMap[BB]; + } +} + +/// clear - Clear out all the function-specific state. This returns this +/// FunctionLoweringInfo to an empty state, ready to be used for a +/// different function. +void FunctionLoweringInfo::clear() { + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); + LiveOutRegInfo.clear(); + VisitedBBs.clear(); + ArgDbgValues.clear(); + ByValArgFrameIndexMap.clear(); + RegFixups.clear(); + StatepointStackSlots.clear(); + StatepointRelocatedValues.clear(); + PreferredExtendType.clear(); +} + +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(MVT VT) { + return RegInfo->createVirtualRegister( + MF->getSubtarget().getTargetLowering()->getRegClassFor(VT)); +} + +/// CreateRegs - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { + const TargetLowering *TLI = MF->getSubtarget().getTargetLowering(); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); + + unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = CreateReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the +/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If +/// the register's LiveOutInfo is for a smaller bit width, it is extended to +/// the larger bit width by zero extension. The bit width must be no smaller +/// than the LiveOutInfo's existing bit width. +const FunctionLoweringInfo::LiveOutInfo * +FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { + if (!LiveOutRegInfo.inBounds(Reg)) + return nullptr; + + LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; + if (!LOI->IsValid) + return nullptr; + + if (BitWidth > LOI->KnownZero.getBitWidth()) { + LOI->NumSignBits = 1; + LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); + LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + } + + return LOI; +} + +/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination +/// register based on the LiveOutInfo of its operands. +void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { + Type *Ty = PN->getType(); + if (!Ty->isIntegerTy() || Ty->isVectorTy()) + return; + + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, MF->getDataLayout(), Ty, ValueVTs); + assert(ValueVTs.size() == 1 && + "PHIs with non-vector integer types should have a single VT."); + EVT IntVT = ValueVTs[0]; + + if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1) + return; + IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); + unsigned BitWidth = IntVT.getSizeInBits(); + + unsigned DestReg = ValueMap[PN]; + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + return; + LiveOutRegInfo.grow(DestReg); + LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; + + Value *V = PN->getIncomingValue(0); + if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = Val.getNumSignBits(); + DestLOI.KnownZero = ~Val; + DestLOI.KnownOne = Val; + } else { + assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" + "CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI = *SrcLOI; + } + + assert(DestLOI.KnownZero.getBitWidth() == BitWidth && + DestLOI.KnownOne.getBitWidth() == BitWidth && + "Masks should have the same bit width as the type."); + + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (isa<UndefValue>(V) || isa<ConstantExpr>(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); + DestLOI.KnownZero &= ~Val; + DestLOI.KnownOne &= Val; + continue; + } + + assert(ValueMap.count(V) && "V should have been placed in ValueMap when " + "its CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); + DestLOI.KnownZero &= SrcLOI->KnownZero; + DestLOI.KnownOne &= SrcLOI->KnownOne; + } +} + +/// setArgumentFrameIndex - Record frame index for the byval +/// argument. This overrides previous frame index entry for this argument, +/// if any. +void FunctionLoweringInfo::setArgumentFrameIndex(const Argument *A, + int FI) { + ByValArgFrameIndexMap[A] = FI; +} + +/// getArgumentFrameIndex - Get frame index for the byval argument. +/// If the argument does not have any assigned frame index then 0 is +/// returned. +int FunctionLoweringInfo::getArgumentFrameIndex(const Argument *A) { + DenseMap<const Argument *, int>::iterator I = + ByValArgFrameIndexMap.find(A); + if (I != ByValArgFrameIndexMap.end()) + return I->second; + DEBUG(dbgs() << "Argument does not have assigned frame index!\n"); + return 0; +} + +unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg( + const Value *CPI, const TargetRegisterClass *RC) { + MachineRegisterInfo &MRI = MF->getRegInfo(); + auto I = CatchPadExceptionPointers.insert({CPI, 0}); + unsigned &VReg = I.first->second; + if (I.second) + VReg = MRI.createVirtualRegister(RC); + assert(VReg && "null vreg in exception pointer table!"); + return VReg; +} + +/// ComputeUsesVAFloatArgument - Determine if any floating-point values are +/// being passed to this variadic function, and set the MachineModuleInfo's +/// usesVAFloatArgument flag if so. This flag is used to emit an undefined +/// reference to _fltused on Windows, which will link in MSVCRT's +/// floating-point support. +void llvm::ComputeUsesVAFloatArgument(const CallInst &I, + MachineModuleInfo *MMI) +{ + FunctionType *FT = cast<FunctionType>( + I.getCalledValue()->getType()->getContainedType(0)); + if (FT->isVarArg() && !MMI->usesVAFloatArgument()) { + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + Type* T = I.getArgOperand(i)->getType(); + for (auto i : post_order(T)) { + if (i->isFloatingPointTy()) { + MMI->setUsesVAFloatArgument(true); + return; + } + } + } + } +} + +/// AddLandingPadInfo - Extract the exception handling information from the +/// landingpad instruction and add them to the specified machine module info. +void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI, + MachineBasicBlock *MBB) { + if (const auto *PF = dyn_cast<Function>( + I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts())) + MMI.addPersonality(PF); + + if (I.isCleanup()) + MMI.addCleanup(MBB); + + // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct, + // but we need to do it this way because of how the DWARF EH emitter + // processes the clauses. + for (unsigned i = I.getNumClauses(); i != 0; --i) { + Value *Val = I.getClause(i - 1); + if (I.isCatch(i - 1)) { + MMI.addCatchTypeInfo(MBB, + dyn_cast<GlobalValue>(Val->stripPointerCasts())); + } else { + // Add filters in a list. + Constant *CVal = cast<Constant>(Val); + SmallVector<const GlobalValue*, 4> FilterList; + for (User::op_iterator + II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II) + FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts())); + + MMI.addFilterTypeInfo(MBB, FilterList); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp new file mode 100644 index 0000000..a1e2d41 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -0,0 +1,1048 @@ +//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "instr-emitter" + +/// MinRCSize - Smallest register class we allow when constraining virtual +/// registers. If satisfying all register class constraints would require +/// using a smaller register class, emit a COPY to a new virtual register +/// instead. +const unsigned MinRCSize = 4; + +/// CountResults - The results of target nodes have register or immediate +/// operands first, then an optional chain, and optional glue operands (which do +/// not go into the resulting MachineInstr). +unsigned InstrEmitter::CountResults(SDNode *Node) { + unsigned N = Node->getNumValues(); + while (N && Node->getValueType(N - 1) == MVT::Glue) + --N; + if (N && Node->getValueType(N - 1) == MVT::Other) + --N; // Skip over chain result. + return N; +} + +/// countOperands - The inputs to target nodes have any actual inputs first, +/// followed by an optional chain operand, then an optional glue operand. +/// Compute the number of actual operands that will go into the resulting +/// MachineInstr. +/// +/// Also count physreg RegisterSDNode and RegisterMaskSDNode operands preceding +/// the chain and glue. These operands may be implicit on the machine instr. +static unsigned countOperands(SDNode *Node, unsigned NumExpUses, + unsigned &NumImpUses) { + unsigned N = Node->getNumOperands(); + while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue) + --N; + if (N && Node->getOperand(N - 1).getValueType() == MVT::Other) + --N; // Ignore chain if it exists. + + // Count RegisterSDNode and RegisterMaskSDNode operands for NumImpUses. + NumImpUses = N - NumExpUses; + for (unsigned I = N; I > NumExpUses; --I) { + if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1))) + continue; + if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1))) + if (TargetRegisterInfo::isPhysicalRegister(RN->getReg())) + continue; + NumImpUses = N - I; + break; + } + + return N; +} + +/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an +/// implicit physical register output. +void InstrEmitter:: +EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, + unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VRBase = 0; + if (TargetRegisterInfo::isVirtualRegister(SrcReg)) { + // Just use the input register directly! + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + return; + } + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + bool MatchReg = true; + const TargetRegisterClass *UseRC = nullptr; + MVT VT = Node->getSimpleValueType(ResNo); + + // Stick to the preferred register classes for legal types. + if (TLI->isTypeLegal(VT)) + UseRC = TLI->getRegClassFor(VT); + + if (!IsClone && !IsCloned) + for (SDNode *User : Node->uses()) { + bool Match = true; + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + Match = false; + } else if (DestReg != SrcReg) + Match = false; + } else { + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { + SDValue Op = User->getOperand(i); + if (Op.getNode() != Node || Op.getResNo() != ResNo) + continue; + MVT VT = Node->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Other || VT == MVT::Glue) + continue; + Match = false; + if (User->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(User->getMachineOpcode()); + const TargetRegisterClass *RC = nullptr; + if (i+II.getNumDefs() < II.getNumOperands()) { + RC = TRI->getAllocatableClass( + TII->getRegClass(II, i+II.getNumDefs(), TRI, *MF)); + } + if (!UseRC) + UseRC = RC; + else if (RC) { + const TargetRegisterClass *ComRC = + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); + // If multiple uses expect disjoint register classes, we emit + // copies in AddRegisterOperand. + if (ComRC) + UseRC = ComRC; + } + } + } + } + MatchReg &= Match; + if (VRBase) + break; + } + + const TargetRegisterClass *SrcRC = nullptr, *DstRC = nullptr; + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); + + // Figure out the register class to create for the destreg. + if (VRBase) { + DstRC = MRI->getRegClass(VRBase); + } else if (UseRC) { + assert(UseRC->hasType(VT) && "Incompatible phys register def and uses!"); + DstRC = UseRC; + } else { + DstRC = TLI->getRegClassFor(VT); + } + + // If all uses are reading from the src physical register and copying the + // register is either impossible or very expensive, then don't create a copy. + if (MatchReg && SrcRC->getCopyCost() < 0) { + VRBase = SrcReg; + } else { + // Create the reg, emit the copy. + VRBase = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); + } + + SDValue Op(Node, ResNo); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// getDstOfCopyToRegUse - If the only use of the specified result number of +/// node is a CopyToReg, return its destination register. Return 0 otherwise. +unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const { + if (!Node->hasOneUse()) + return 0; + + SDNode *User = *Node->use_begin(); + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == ResNo) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return Reg; + } + return 0; +} + +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, + const MCInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && + "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + + unsigned NumResults = CountResults(Node); + for (unsigned i = 0; i < II.getNumDefs(); ++i) { + // If the specific node value is only used by a CopyToReg and the dest reg + // is a vreg in the same register class, use the CopyToReg'd destination + // register instead of creating a new vreg. + unsigned VRBase = 0; + const TargetRegisterClass *RC = + TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // Always let the value type influence the used register class. The + // constraints on the instruction may be too lax to represent the value + // type correctly. For example, a 64-bit float (X86::FR64) can't live in + // the 32-bit float super-class (X86::FR32). + if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) { + const TargetRegisterClass *VTRC = + TLI->getRegClassFor(Node->getSimpleValueType(i)); + if (RC) + VTRC = TRI->getCommonSubClass(RC, VTRC); + if (VTRC) + RC = VTRC; + } + + if (II.OpInfo[i].isOptionalDef()) { + // Optional def must be a physical register. + unsigned NumResults = CountResults(Node); + VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg(); + assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); + MIB.addReg(VRBase, RegState::Define); + } + + if (!VRBase && !IsClone && !IsCloned) + for (SDNode *User : Node->uses()) { + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node && + User->getOperand(2).getResNo() == i) { + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); + if (RegRC == RC) { + VRBase = Reg; + MIB.addReg(VRBase, RegState::Define); + break; + } + } + } + } + + // Create the result registers for this node and add the result regs to + // the machine instruction. + if (VRBase == 0) { + assert(RC && "Isn't a register operand!"); + VRBase = MRI->createVirtualRegister(RC); + MIB.addReg(VRBase, RegState::Define); + } + + // If this def corresponds to a result of the SDNode insert the VRBase into + // the lookup map. + if (i < NumResults) { + SDValue Op(Node, i); + if (IsClone) + VRBaseMap.erase(Op); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + } + } +} + +/// getVR - Return the virtual register corresponding to the specified result +/// of the specified node. +unsigned InstrEmitter::getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Op.isMachineOpcode() && + Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + // Add an IMPLICIT_DEF instruction before every use. + unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo()); + // IMPLICIT_DEF can produce any type of result so its MCInstrDesc + // does not include operand register class info. + if (!VReg) { + const TargetRegisterClass *RC = + TLI->getRegClassFor(Op.getSimpleValueType()); + VReg = MRI->createVirtualRegister(RC); + } + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), VReg); + return VReg; + } + + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + assert(I != VRBaseMap.end() && "Node emitted out of order - late"); + return I->second; +} + + +/// AddRegisterOperand - Add the specified register as an operand to the +/// specified machine instr. Insert register copies if the register is +/// not in the required register class. +void +InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); + // Get/emit the operand. + unsigned VReg = getVR(Op, VRBaseMap); + assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); + + const MCInstrDesc &MCID = MIB->getDesc(); + bool isOptDef = IIOpNum < MCID.getNumOperands() && + MCID.OpInfo[IIOpNum].isOptionalDef(); + + // If the instruction requires a register in a different class, create + // a new virtual register and copy the value into it, but first attempt to + // shrink VReg's register class within reason. For example, if VReg == GR32 + // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP. + if (II) { + const TargetRegisterClass *DstRC = nullptr; + if (IIOpNum < II->getNumOperands()) + DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF)); + if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) { + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); + VReg = NewVReg; + } + } + + // If this value has only one use, that use is a kill. This is a + // conservative approximation. InstrEmitter does trivial coalescing + // with CopyFromReg nodes, so don't emit kill flags for them. + // Avoid kill flags on Schedule cloned nodes, since there will be + // multiple uses. + // Tied operands are never killed, so we need to check that. And that + // means we need to determine the index of the operand. + bool isKill = Op.hasOneUse() && + Op.getNode()->getOpcode() != ISD::CopyFromReg && + !IsDebug && + !(IsClone || IsCloned); + if (isKill) { + unsigned Idx = MIB->getNumOperands(); + while (Idx > 0 && + MIB->getOperand(Idx-1).isReg() && + MIB->getOperand(Idx-1).isImplicit()) + --Idx; + bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1; + if (isTied) + isKill = false; + } + + MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) | + getDebugRegState(IsDebug)); +} + +/// AddOperand - Add the specified operand to the specified machine instr. II +/// specifies the instruction information for the node, and IIOpNum is the +/// operand number (in the II) that we are adding. +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned) { + if (Op.isMachineOpcode()) { + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + MIB.addImm(C->getSExtValue()); + } else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) { + MIB.addFPImm(F->getConstantFPValue()); + } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) { + // Turn additional physreg operands into implicit uses on non-variadic + // instructions. This is used by call and return instructions passing + // arguments in registers. + bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); + MIB.addReg(R->getReg(), getImplRegState(Imp)); + } else if (RegisterMaskSDNode *RM = dyn_cast<RegisterMaskSDNode>(Op)) { + MIB.addRegMask(RM->getRegMask()); + } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) { + MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags()); + } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) { + MIB.addMBB(BBNode->getBasicBlock()); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) { + MIB.addFrameIndex(FI->getIndex()); + } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) { + MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) { + int Offset = CP->getOffset(); + unsigned Align = CP->getAlignment(); + Type *Type = CP->getType(); + // MachineConstantPool wants an explicit alignment. + if (Align == 0) { + Align = MF->getDataLayout().getPrefTypeAlignment(Type); + if (Align == 0) { + // Alignment of vector types. FIXME! + Align = MF->getDataLayout().getTypeAllocSize(Type); + } + } + + unsigned Idx; + MachineConstantPool *MCP = MF->getConstantPool(); + if (CP->isMachineConstantPoolEntry()) + Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); + else + Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); + MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); + } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) { + MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); + } else if (auto *SymNode = dyn_cast<MCSymbolSDNode>(Op)) { + MIB.addSym(SymNode->getMCSymbol()); + } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op)) { + MIB.addBlockAddress(BA->getBlockAddress(), + BA->getOffset(), + BA->getTargetFlags()); + } else if (TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(Op)) { + MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); + } else { + assert(Op.getValueType() != MVT::Other && + Op.getValueType() != MVT::Glue && + "Chain and glue operands should occur at end of operand list!"); + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, + IsDebug, IsClone, IsCloned); + } +} + +unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + MVT VT, DebugLoc DL) { + const TargetRegisterClass *VRC = MRI->getRegClass(VReg); + const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); + + // RC is a sub-class of VRC that supports SubIdx. Try to constrain VReg + // within reason. + if (RC && RC != VRC) + RC = MRI->constrainRegClass(VReg, RC, MinRCSize); + + // VReg has been adjusted. It can be used with SubIdx operands now. + if (RC) + return VReg; + + // VReg couldn't be reasonably constrained. Emit a COPY to a new virtual + // register instead. + RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx); + assert(RC && "No legal register class for VT supports that SubIdx"); + unsigned NewReg = MRI->createVirtualRegister(RC); + BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg) + .addReg(VReg); + return NewReg; +} + +/// EmitSubregNode - Generate machine code for subreg nodes. +/// +void InstrEmitter::EmitSubregNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned VRBase = 0; + unsigned Opc = Node->getMachineOpcode(); + + // If the node is only used by a CopyToReg and the dest reg is a vreg, use + // the CopyToReg'd destination register instead of creating a new vreg. + for (SDNode *User : Node->uses()) { + if (User->getOpcode() == ISD::CopyToReg && + User->getOperand(2).getNode() == Node) { + unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(DestReg)) { + VRBase = DestReg; + break; + } + } + } + + if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub. There are no + // constraints on the %dst register, COPY can target all legal register + // classes. + unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *TRC = + TLI->getRegClassFor(Node->getSimpleValueType(0)); + + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + MachineInstr *DefMI = MRI->getVRegDef(VReg); + unsigned SrcReg, DstReg, DefSubIdx; + if (DefMI && + TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) && + SubIdx == DefSubIdx && + TRC == MRI->getRegClass(SrcReg)) { + // Optimize these: + // r1025 = s/zext r1024, 4 + // r1026 = extract_subreg r1025, 4 + // to a copy + // r1026 = copy r1024 + VRBase = MRI->createVirtualRegister(TRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg); + MRI->clearKillFlags(SrcReg); + } else { + // VReg may not support a SubIdx sub-register, and we may need to + // constrain its register class or issue a COPY to a compatible register + // class. + VReg = ConstrainForSubReg(VReg, SubIdx, + Node->getOperand(0).getSimpleValueType(), + Node->getDebugLoc()); + + // Create the destreg if it is missing. + if (VRBase == 0) + VRBase = MRI->createVirtualRegister(TRC); + + // Create the extract_subreg machine instruction. + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase).addReg(VReg, 0, SubIdx); + } + } else if (Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + SDValue N2 = Node->getOperand(2); + unsigned SubIdx = cast<ConstantSDNode>(N2)->getZExtValue(); + + // Figure out the register class to create for the destreg. It should be + // the largest legal register class supporting SubIdx sub-registers. + // RegisterCoalescer will constrain it further if it decides to eliminate + // the INSERT_SUBREG instruction. + // + // %dst = INSERT_SUBREG %src, %sub, SubIdx + // + // is lowered by TwoAddressInstructionPass to: + // + // %dst = COPY %src + // %dst:SubIdx = COPY %sub + // + // There is no constraint on the %src register class. + // + const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); + SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); + assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); + + if (VRBase == 0 || !SRC->hasSubClassEq(MRI->getRegClass(VRBase))) + VRBase = MRI->createVirtualRegister(SRC); + + // Create the insert_subreg or subreg_to_reg machine instruction. + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); + + // If creating a subreg_to_reg, then the first input operand + // is an implicit value immediate, otherwise it's a register + if (Opc == TargetOpcode::SUBREG_TO_REG) { + const ConstantSDNode *SD = cast<ConstantSDNode>(N0); + MIB.addImm(SD->getZExtValue()); + } else + AddOperand(MIB, N0, 0, nullptr, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + // Add the subregster being inserted + AddOperand(MIB, N1, 0, nullptr, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + MIB.addImm(SubIdx); + MBB->insert(InsertPos, MIB); + } else + llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. +/// COPY_TO_REGCLASS is just a normal copy, except that the destination +/// register is constrained to be in a particular register class. +/// +void +InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); + + // Create the new VReg in the destination class and emit a copy. + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); + const TargetRegisterClass *DstRC = + TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx)); + unsigned NewVReg = MRI->createVirtualRegister(DstRC); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); + + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. +/// +void InstrEmitter::EmitRegSequence(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); + unsigned NumOps = Node->getNumOperands(); + assert((NumOps & 1) == 1 && + "REG_SEQUENCE must have an odd number of operands!"); + for (unsigned i = 1; i != NumOps; ++i) { + SDValue Op = Node->getOperand(i); + if ((i & 1) == 0) { + RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1)); + // Skip physical registers as they don't have a vreg to get and we'll + // insert copies for them in TwoAddressInstructionPass anyway. + if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) { + unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue(); + unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap); + const TargetRegisterClass *TRC = MRI->getRegClass(SubReg); + const TargetRegisterClass *SRC = + TRI->getMatchingSuperRegClass(RC, TRC, SubIdx); + if (SRC && SRC != RC) { + MRI->setRegClass(NewVReg, SRC); + RC = SRC; + } + } + } + AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + IsClone, IsCloned); + } + + MBB->insert(InsertPos, MIB); + SDValue Op(Node, 0); + bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); +} + +/// EmitDbgValue - Generate machine instruction for a dbg_value node. +/// +MachineInstr * +InstrEmitter::EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap) { + uint64_t Offset = SD->getOffset(); + MDNode *Var = SD->getVariable(); + MDNode *Expr = SD->getExpression(); + DebugLoc DL = SD->getDebugLoc(); + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + + if (SD->getKind() == SDDbgValue::FRAMEIX) { + // Stack address; this needs to be lowered in target-dependent fashion. + // EmitTargetCodeForFrameDebugValue is responsible for allocation. + return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()) + .addImm(Offset) + .addMetadata(Var) + .addMetadata(Expr); + } + // Otherwise, we're going to create an instruction here. + const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); + MachineInstrBuilder MIB = BuildMI(*MF, DL, II); + if (SD->getKind() == SDDbgValue::SDNODE) { + SDNode *Node = SD->getSDNode(); + SDValue Op = SDValue(Node, SD->getResNo()); + // It's possible we replaced this SDNode with other(s) and therefore + // didn't generate code for it. It's better to catch these cases where + // they happen and transfer the debug info, but trying to guarantee that + // in all cases would be very fragile; this is a safeguard for any + // that were missed. + DenseMap<SDValue, unsigned>::iterator I = VRBaseMap.find(Op); + if (I==VRBaseMap.end()) + MIB.addReg(0U); // undef + else + AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); + } else if (SD->getKind() == SDDbgValue::CONST) { + const Value *V = SD->getConst(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) { + if (CI->getBitWidth() > 64) + MIB.addCImm(CI); + else + MIB.addImm(CI->getSExtValue()); + } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) { + MIB.addFPImm(CF); + } else { + // Could be an Undef. In any case insert an Undef so we can see what we + // dropped. + MIB.addReg(0U); + } + } else { + // Insert an Undef so we can see what we dropped. + MIB.addReg(0U); + } + + // Indirect addressing is indicated by an Imm as the second parameter. + if (SD->isIndirect()) + MIB.addImm(Offset); + else { + assert(Offset == 0 && "direct value cannot have an offset"); + MIB.addReg(0U, RegState::Debug); + } + + MIB.addMetadata(Var); + MIB.addMetadata(Expr); + + return &*MIB; +} + +/// EmitMachineNode - Generate machine code for a target-specific node and +/// needed dependencies. +/// +void InstrEmitter:: +EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + unsigned Opc = Node->getMachineOpcode(); + + // Handle subreg insert/extract specially + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG) { + EmitSubregNode(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + // Handle COPY_TO_REGCLASS specially. + if (Opc == TargetOpcode::COPY_TO_REGCLASS) { + EmitCopyToRegClassNode(Node, VRBaseMap); + return; + } + + // Handle REG_SEQUENCE specially. + if (Opc == TargetOpcode::REG_SEQUENCE) { + EmitRegSequence(Node, VRBaseMap, IsClone, IsCloned); + return; + } + + if (Opc == TargetOpcode::IMPLICIT_DEF) + // We want a unique VR for each IMPLICIT_DEF use. + return; + + const MCInstrDesc &II = TII->get(Opc); + unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const MCPhysReg *ScratchRegs = nullptr; + + // Handle STACKMAP and PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::STACKMAP || Opc == TargetOpcode::PATCHPOINT) { + // Stackmaps do not have arguments and do not preserve their calling + // convention. However, to simplify runtime support, they clobber the same + // scratch registers as AnyRegCC. + unsigned CC = CallingConv::AnyReg; + if (Opc == TargetOpcode::PATCHPOINT) { + CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + } + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + + unsigned NumImpUses = 0; + unsigned NodeOperands = + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=nullptr; +#ifndef NDEBUG + unsigned NumMIOperands = NodeOperands + NumResults; + if (II.isVariadic()) + assert(NumMIOperands >= II.getNumOperands() && + "Too few operands for a variadic node!"); + else + assert(NumMIOperands >= II.getNumOperands() && + NumMIOperands <= II.getNumOperands() + II.getNumImplicitDefs() + + NumImpUses && + "#operands for dag node doesn't match .td file!"); +#endif + + // Create the new machine instruction. + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); + + // Add result register values for things that are defined by this + // instruction. + if (NumResults) + CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); + + // Emit all of the actual operands of this instruction, adding them to the + // instruction as appropriate. + bool HasOptPRefs = NumDefs > NumResults; + assert((!HasOptPRefs || !HasPhysRegOuts) && + "Unable to cope with optional defs and phys regs defs!"); + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; + for (unsigned i = NumSkip; i != NodeOperands; ++i) + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, + VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + + // Transfer all of the memory reference descriptions of this instruction. + MIB.setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(), + cast<MachineSDNode>(Node)->memoperands_end()); + + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MIB); + + // The MachineInstr may also define physregs instead of virtregs. These + // physreg values can reach other instructions in different ways: + // + // 1. When there is a use of a Node value beyond the explicitly defined + // virtual registers, we emit a CopyFromReg for one of the implicitly + // defined physregs. This only happens when HasPhysRegOuts is true. + // + // 2. A CopyFromReg reading a physreg may be glued to this instruction. + // + // 3. A glued instruction may implicitly use a physreg. + // + // 4. A glued instruction may use a RegisterSDNode operand. + // + // Collect all the used physreg defs, and make sure that any unused physreg + // defs are marked as dead. + SmallVector<unsigned, 8> UsedRegs; + + // Additional results must be physical register defs. + if (HasPhysRegOuts) { + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; + if (!Node->hasAnyUseOfValue(i)) + continue; + // This implicitly defined physreg has a use. + UsedRegs.push_back(Reg); + EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap); + } + } + + // Scan the glue chain for any used physregs. + if (Node->getValueType(Node->getNumValues()-1) == MVT::Glue) { + for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser()) { + if (F->getOpcode() == ISD::CopyFromReg) { + UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg()); + continue; + } else if (F->getOpcode() == ISD::CopyToReg) { + // Skip CopyToReg nodes that are internal to the glue chain. + continue; + } + // Collect declared implicit uses. + const MCInstrDesc &MCID = TII->get(F->getMachineOpcode()); + UsedRegs.append(MCID.getImplicitUses(), + MCID.getImplicitUses() + MCID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + } + + // Finally mark unused registers as dead. + if (!UsedRegs.empty() || II.getImplicitDefs()) + MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); + + // Run post-isel target hook to adjust this instruction if needed. + if (II.hasPostISelHook()) + TLI->AdjustInstrPostInstrSelection(MIB, Node); +} + +/// EmitSpecialNode - Generate machine code for a target-independent node and +/// needed dependencies. +void InstrEmitter:: +EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + Node->dump(); +#endif + llvm_unreachable("This target-independent node should have been selected!"); + case ISD::EntryToken: + llvm_unreachable("EntryToken should have been excluded from the schedule!"); + case ISD::MERGE_VALUES: + case ISD::TokenFactor: // fall thru + break; + case ISD::CopyToReg: { + unsigned SrcReg; + SDValue SrcVal = Node->getOperand(2); + if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal)) + SrcReg = R->getReg(); + else + SrcReg = getVR(SrcVal, VRBaseMap); + + unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + if (SrcReg == DestReg) // Coalesced away the copy? Ignore. + break; + + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); + break; + } + case ISD::CopyFromReg: { + unsigned SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + EmitCopyFromReg(Node, 0, IsClone, IsCloned, SrcReg, VRBaseMap); + break; + } + case ISD::EH_LABEL: { + MCSymbol *S = cast<EHLabelSDNode>(Node)->getLabel(); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), + TII->get(TargetOpcode::EH_LABEL)).addSym(S); + break; + } + + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: { + unsigned TarOp = (Node->getOpcode() == ISD::LIFETIME_START) ? + TargetOpcode::LIFETIME_START : TargetOpcode::LIFETIME_END; + + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Node->getOperand(1)); + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TarOp)) + .addFrameIndex(FI->getIndex()); + break; + } + + case ISD::INLINEASM: { + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + // Create the inline asm machine instruction. + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::INLINEASM)); + + // Add the asm string as an external symbol operand. + SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); + const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol(); + MIB.addExternalSymbol(AsmStr); + + // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore + // bits. + int64_t ExtraInfo = + cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))-> + getZExtValue(); + MIB.addImm(ExtraInfo); + + // Remember to operand index of the group flags. + SmallVector<unsigned, 8> GroupIdx; + + // Remember registers that are part of early-clobber defs. + SmallVector<unsigned, 8> ECRegs; + + // Add all of the operand registers to the instruction. + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + GroupIdx.push_back(MIB->getNumOperands()); + MIB.addImm(Flags); + ++i; // Skip the ID value. + + switch (InlineAsm::getKind(Flags)) { + default: llvm_unreachable("Bad flags!"); + case InlineAsm::Kind_RegDef: + for (unsigned j = 0; j != NumVals; ++j, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MIB.addReg(Reg, RegState::Define | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + } + break; + case InlineAsm::Kind_RegDefEarlyClobber: + case InlineAsm::Kind_Clobber: + for (unsigned j = 0; j != NumVals; ++j, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); + ECRegs.push_back(Reg); + } + break; + case InlineAsm::Kind_RegUse: // Use of register. + case InlineAsm::Kind_Imm: // Immediate. + case InlineAsm::Kind_Mem: // Addressing mode. + // The addressing mode has been selected, just add all of the + // operands to the machine instruction. + for (unsigned j = 0; j != NumVals; ++j, ++i) + AddOperand(MIB, Node->getOperand(i), 0, nullptr, VRBaseMap, + /*IsDebug=*/false, IsClone, IsCloned); + + // Manually set isTied bits. + if (InlineAsm::getKind(Flags) == InlineAsm::Kind_RegUse) { + unsigned DefGroup = 0; + if (InlineAsm::isUseOperandTiedToDef(Flags, DefGroup)) { + unsigned DefIdx = GroupIdx[DefGroup] + 1; + unsigned UseIdx = GroupIdx.back() + 1; + for (unsigned j = 0; j != NumVals; ++j) + MIB->tieOperands(DefIdx + j, UseIdx + j); + } + } + break; + } + } + + // GCC inline assembly allows input operands to also be early-clobber + // output operands (so long as the operand is written only after it's + // used), but this does not match the semantics of our early-clobber flag. + // If an early-clobber operand register is also an input operand register, + // then remove the early-clobber flag. + for (unsigned Reg : ECRegs) { + if (MIB->readsRegister(Reg, TRI)) { + MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI); + assert(MO && "No def operand for clobbered register?"); + MO->setIsEarlyClobber(false); + } + } + + // Get the mdnode from the asm if it exists and add it to the instruction. + SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); + const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD(); + if (MD) + MIB.addMetadata(MD); + + MBB->insert(InsertPos, MIB); + break; + } + } +} + +/// InstrEmitter - Construct an InstrEmitter and set it to start inserting +/// at the given position in the given block. +InstrEmitter::InstrEmitter(MachineBasicBlock *mbb, + MachineBasicBlock::iterator insertpos) + : MF(mbb->getParent()), MRI(&MF->getRegInfo()), + TII(MF->getSubtarget().getInstrInfo()), + TRI(MF->getSubtarget().getRegisterInfo()), + TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), + InsertPos(insertpos) {} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h new file mode 100644 index 0000000..3b24d93 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -0,0 +1,145 @@ +//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This declares the Emit routines for the SelectionDAG class, which creates +// MachineInstrs based on the decisions of the SelectionDAG instruction +// selection. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_INSTREMITTER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" + +namespace llvm { + +class MachineInstrBuilder; +class MCInstrDesc; +class SDDbgValue; + +class LLVM_LIBRARY_VISIBILITY InstrEmitter { + MachineFunction *MF; + MachineRegisterInfo *MRI; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + + MachineBasicBlock *MBB; + MachineBasicBlock::iterator InsertPos; + + /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an + /// implicit physical register output. + void EmitCopyFromReg(SDNode *Node, unsigned ResNo, + bool IsClone, bool IsCloned, + unsigned SrcReg, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getDstOfCopyToRegUse - If the only use of the specified result number of + /// node is a CopyToReg, return its destination register. Return 0 otherwise. + unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, + unsigned ResNo) const; + + void CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, + const MCInstrDesc &II, + bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// getVR - Return the virtual register corresponding to the specified result + /// of the specified node. + unsigned getVR(SDValue Op, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// AddRegisterOperand - Add the specified register as an operand to the + /// specified machine instr. Insert register copies if the register is + /// not in the required register class. + void AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// AddOperand - Add the specified operand to the specified machine instr. II + /// specifies the instruction information for the node, and IIOpNum is the + /// operand number (in the II) that we are adding. IIOpNum and II are used for + /// assertions only. + void AddOperand(MachineInstrBuilder &MIB, + SDValue Op, + unsigned IIOpNum, + const MCInstrDesc *II, + DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsDebug, bool IsClone, bool IsCloned); + + /// ConstrainForSubReg - Try to constrain VReg to a register class that + /// supports SubIdx sub-registers. Emit a copy if that isn't possible. + /// Return the virtual register to use. + unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, + MVT VT, DebugLoc DL); + + /// EmitSubregNode - Generate machine code for subreg nodes. + /// + void EmitSubregNode(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); + + /// EmitCopyToRegClassNode - Generate machine code for COPY_TO_REGCLASS nodes. + /// COPY_TO_REGCLASS is just a normal copy, except that the destination + /// register is constrained to be in a particular register class. + /// + void EmitCopyToRegClassNode(SDNode *Node, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitRegSequence - Generate machine code for REG_SEQUENCE nodes. + /// + void EmitRegSequence(SDNode *Node, DenseMap<SDValue, unsigned> &VRBaseMap, + bool IsClone, bool IsCloned); +public: + /// CountResults - The results of target nodes have register or immediate + /// operands first, then an optional chain, and optional flag operands + /// (which do not go into the machine instrs.) + static unsigned CountResults(SDNode *Node); + + /// EmitDbgValue - Generate machine instruction for a dbg_value node. + /// + MachineInstr *EmitDbgValue(SDDbgValue *SD, + DenseMap<SDValue, unsigned> &VRBaseMap); + + /// EmitNode - Generate machine code for a node and needed dependencies. + /// + void EmitNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap) { + if (Node->isMachineOpcode()) + EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap); + else + EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap); + } + + /// getBlock - Return the current basic block. + MachineBasicBlock *getBlock() { return MBB; } + + /// getInsertPos - Return the current insertion position. + MachineBasicBlock::iterator getInsertPos() { return InsertPos; } + + /// InstrEmitter - Construct an InstrEmitter and set it to start inserting + /// at the given position in the given block. + InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos); + +private: + void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); + void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, + DenseMap<SDValue, unsigned> &VRBaseMap); +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp new file mode 100644 index 0000000..f46767f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -0,0 +1,4671 @@ +//===-- LegalizeDAG.cpp - Implement SelectionDAG::Legalize ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::Legalize method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "legalizedag" + +namespace { + +struct FloatSignAsInt; + +//===----------------------------------------------------------------------===// +/// This takes an arbitrary SelectionDAG as input and +/// hacks on it until the target machine can handle it. This involves +/// eliminating value sizes the machine cannot handle (promoting small sizes to +/// large sizes or splitting up large values into small values) as well as +/// eliminating operations the machine cannot handle. +/// +/// This code also does a small amount of optimization and recognition of idioms +/// as part of its processing. For example, if a target does not support a +/// 'setcc' instruction efficiently, but does support 'brcc' instruction, this +/// will attempt merge setcc and brc instructions into brcc's. +/// +class SelectionDAGLegalize { + const TargetMachine &TM; + const TargetLowering &TLI; + SelectionDAG &DAG; + + /// \brief The set of nodes which have already been legalized. We hold a + /// reference to it in order to update as necessary on node deletion. + SmallPtrSetImpl<SDNode *> &LegalizedNodes; + + /// \brief A set of all the nodes updated during legalization. + SmallSetVector<SDNode *, 16> *UpdatedNodes; + + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + } + + // Libcall insertion helpers. + +public: + SelectionDAGLegalize(SelectionDAG &DAG, + SmallPtrSetImpl<SDNode *> &LegalizedNodes, + SmallSetVector<SDNode *, 16> *UpdatedNodes = nullptr) + : TM(DAG.getTarget()), TLI(DAG.getTargetLoweringInfo()), DAG(DAG), + LegalizedNodes(LegalizedNodes), UpdatedNodes(UpdatedNodes) {} + + /// \brief Legalizes the given operation. + void LegalizeOp(SDNode *Node); + +private: + SDValue OptimizeFloatStore(StoreSDNode *ST); + + void LegalizeLoadOps(SDNode *Node); + void LegalizeStoreOps(SDNode *Node); + + /// Some targets cannot handle a variable + /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it + /// is necessary to spill the vector being inserted into to memory, perform + /// the insert there, and then read the result back. + SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, + SDValue Idx, SDLoc dl); + SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, + SDValue Idx, SDLoc dl); + + /// Return a vector shuffle operation which + /// performs the same shuffe in terms of order or result bytes, but on a type + /// whose vector element type is narrower than the original shuffle type. + /// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, + SDValue N1, SDValue N2, + ArrayRef<int> Mask) const; + + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl); + + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, SDLoc dl); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128); + SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results); + + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl); + SDValue ExpandBUILD_VECTOR(SDNode *Node); + SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); + void ExpandDYNAMIC_STACKALLOC(SDNode *Node, + SmallVectorImpl<SDValue> &Results); + void getSignAsIntValue(FloatSignAsInt &State, SDLoc DL, SDValue Value) const; + SDValue modifySignAsInt(const FloatSignAsInt &State, SDLoc DL, + SDValue NewIntValue) const; + SDValue ExpandFCOPYSIGN(SDNode *Node) const; + SDValue ExpandFABS(SDNode *Node) const; + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, + SDLoc dl); + SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, + SDLoc dl); + SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, + SDLoc dl); + + SDValue ExpandBITREVERSE(SDValue Op, SDLoc dl); + SDValue ExpandBSWAP(SDValue Op, SDLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); + + SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandInsertToVectorThroughStack(SDValue Op); + SDValue ExpandVectorBuildThroughStack(SDNode* Node); + + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); + + // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall + bool ExpandNode(SDNode *Node); + void ConvertNodeToLibcall(SDNode *Node); + void PromoteNode(SDNode *Node); + +public: + // Node replacement helpers + void ReplacedNode(SDNode *N) { + LegalizedNodes.erase(N); + if (UpdatedNodes) + UpdatedNodes->insert(N); + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + assert(Old->getNumValues() == New->getNumValues() && + "Replacing one node with another that produces a different number " + "of values!"); + DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Old, i), SDValue(New, i)); + if (UpdatedNodes) + UpdatedNodes->insert(New); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG); + dbgs() << " with: "; New->dump(&DAG)); + + DAG.ReplaceAllUsesWith(Old, New); + DAG.TransferDbgValues(Old, New); + if (UpdatedNodes) + UpdatedNodes->insert(New.getNode()); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG)); + + DAG.ReplaceAllUsesWith(Old, New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) { + DEBUG(dbgs() << (i == 0 ? " with: " + : " and: "); + New[i]->dump(&DAG)); + DAG.TransferDbgValues(SDValue(Old, i), New[i]); + if (UpdatedNodes) + UpdatedNodes->insert(New[i].getNode()); + } + ReplacedNode(Old); + } +}; +} + +/// Return a vector shuffle operation which +/// performs the same shuffe in terms of order or result bytes, but on a type +/// whose vector element type is narrower than the original shuffle type. +/// e.g. <v4i32> <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, + SDValue N1, SDValue N2, + ArrayRef<int> Mask) const { + unsigned NumMaskElts = VT.getVectorNumElements(); + unsigned NumDestElts = NVT.getVectorNumElements(); + unsigned NumEltsGrowth = NumDestElts / NumMaskElts; + + assert(NumEltsGrowth && "Cannot promote to vector type with fewer elts!"); + + if (NumEltsGrowth == 1) + return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); + + SmallVector<int, 8> NewMask; + for (unsigned i = 0; i != NumMaskElts; ++i) { + int Idx = Mask[i]; + for (unsigned j = 0; j != NumEltsGrowth; ++j) { + if (Idx < 0) + NewMask.push_back(-1); + else + NewMask.push_back(Idx * NumEltsGrowth + j); + } + } + assert(NewMask.size() == NumDestElts && "Non-integer NumEltsGrowth?"); + assert(TLI.isShuffleMaskLegal(NewMask, NVT) && "Shuffle not legal?"); + return DAG.getVectorShuffle(NVT, dl, N1, N2, &NewMask[0]); +} + +/// Expands the ConstantFP node to an integer constant or +/// a load from the constant pool. +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { + bool Extend = false; + SDLoc dl(CFP); + + // If a FP immediate is precise when represented as a float and if the + // target can do an extending load from float to double, we put it into + // the constant pool as a float, even if it's is statically typed as a + // double. This shrinks FP constants and canonicalizes them for targets where + // an FP extending load is the same cost as a normal load (such as on the x87 + // fp stack or PPC FP unit). + EVT VT = CFP->getValueType(0); + ConstantFP *LLVMC = const_cast<ConstantFP*>(CFP->getConstantFPValue()); + if (!UseCP) { + assert((VT == MVT::f64 || VT == MVT::f32) && "Invalid type expansion"); + return DAG.getConstant(LLVMC->getValueAPF().bitcastToAPInt(), dl, + (VT == MVT::f64) ? MVT::i64 : MVT::i32); + } + + EVT OrigVT = VT; + EVT SVT = VT; + while (SVT != MVT::f32 && SVT != MVT::f16) { + SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && + // Only do this if the target has a native EXTLOAD instruction from + // smaller type. + TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) && + TLI.ShouldShrinkFPConstant(OrigVT)) { + Type *SType = SVT.getTypeForEVT(*DAG.getContext()); + LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType)); + VT = SVT; + Extend = true; + } + } + + SDValue CPIdx = + DAG.getConstantPool(LLVMC, TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + if (Extend) { + SDValue Result = DAG.getExtLoad( + ISD::EXTLOAD, dl, OrigVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), VT, + false, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + +/// Expands an unaligned store to 2 half-size stores. +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAGLegalize *DAGLegalize) { + assert(ST->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed stores not implemented!"); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDValue Val = ST->getValue(); + EVT VT = Val.getValueType(); + int Alignment = ST->getAlignment(); + unsigned AS = ST->getAddressSpace(); + + SDLoc dl(ST); + if (ST->getMemoryVT().isFloatingPoint() || + ST->getMemoryVT().isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + if (TLI.isTypeLegal(intVT)) { + // Expand to a bitconvert of the value to the integer type of the + // same size, then a (misaligned) int store. + // FIXME: Does not handle truncating floating point stores! + SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; + } + // Do a (aligned) store to a stack slot, then copy from the stack slot + // to the final destination using (unaligned) integer loads and stores. + EVT StoredVT = ST->getMemoryVT(); + MVT RegVT = + TLI.getRegisterType(*DAG.getContext(), + EVT::getIntegerVT(*DAG.getContext(), + StoredVT.getSizeInBits())); + unsigned StoredBytes = StoredVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT); + + // Perform the original store, only redirected to the stack slot. + SDValue Store = DAG.getTruncStore(Chain, dl, + Val, StackPtr, MachinePointerInfo(), + StoredVT, false, false, 0); + SDValue Increment = DAG.getConstant( + RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout(), AS)); + SmallVector<SDValue, 8> Stores; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the stack slot. + SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, + MachinePointerInfo(), + false, false, false, 0); + // Store it to the final location. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo().getWithOffset(Offset), + ST->isVolatile(), ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset))); + // Increment the pointers. + Offset += RegBytes; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + } + + // The last store may be partial. Do a truncating store. On big-endian + // machines this requires an extending load from the stack slot to ensure + // that the bits are in the right place. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (StoredBytes - Offset)); + + // Load from the stack slot. + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + MachinePointerInfo(), + MemVT, false, false, false, 0); + + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, + ST->getPointerInfo() + .getWithOffset(Offset), + MemVT, ST->isVolatile(), + ST->isNonTemporal(), + MinAlign(ST->getAlignment(), Offset), + ST->getAAInfo())); + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); + return; + } + assert(ST->getMemoryVT().isInteger() && + !ST->getMemoryVT().isVector() && + "Unaligned store of unknown type."); + // Get the half-size VT + EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext()); + int NumBits = NewStoredVT.getSizeInBits(); + int IncrementSize = NumBits / 8; + + // Divide the stored value in two parts. + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Val.getValueType(), + DAG.getDataLayout())); + SDValue Lo = Val; + SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); + + // Store the two parts + SDValue Store1, Store2; + Store1 = DAG.getTruncStore(Chain, dl, + DAG.getDataLayout().isLittleEndian() ? Lo : Hi, + Ptr, ST->getPointerInfo(), NewStoredVT, + ST->isVolatile(), ST->isNonTemporal(), Alignment); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + TLI.getPointerTy(DAG.getDataLayout(), AS))); + Alignment = MinAlign(Alignment, IncrementSize); + Store2 = DAG.getTruncStore( + Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, + ST->isVolatile(), ST->isNonTemporal(), Alignment, ST->getAAInfo()); + + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); +} + +/// Expands an unaligned load to 2 half-size loads. +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { + assert(LD->getAddressingMode() == ISD::UNINDEXED && + "unaligned indexed loads not implemented!"); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + EVT VT = LD->getValueType(0); + EVT LoadedVT = LD->getMemoryVT(); + SDLoc dl(LD); + if (VT.isFloatingPoint() || VT.isVector()) { + EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); + if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { + // Expand to a (misaligned) integer load of the same size, + // then bitconvert to floating point or vector. + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); + SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); + if (LoadedVT != VT) + Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : + ISD::ANY_EXTEND, dl, VT, Result); + + ValResult = Result; + ChainResult = newLoad.getValue(1); + return; + } + + // Copy the value to a (aligned) stack slot using (unaligned) integer + // loads and stores, then do a (aligned) load from the stack slot. + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; + unsigned RegBytes = RegVT.getSizeInBits() / 8; + unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; + + // Make sure the stack slot is also aligned for the register type. + SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT); + + SDValue Increment = + DAG.getConstant(RegBytes, dl, TLI.getPointerTy(DAG.getDataLayout())); + SmallVector<SDValue, 8> Stores; + SDValue StackPtr = StackBase; + unsigned Offset = 0; + + // Do all but one copies using the full register width. + for (unsigned i = 1; i < NumRegs; i++) { + // Load one integer register's worth from the original location. + SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), false, false, 0)); + // Increment the pointers. + Offset += RegBytes; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + Increment); + } + + // The last copy may be partial. Do an extending load. + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), + 8 * (LoadedBytes - Offset)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(Offset), + MemVT, LD->isVolatile(), + LD->isNonTemporal(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); + // Follow the load with a store to the stack slot. Remember the store. + // On big-endian machines this requires a truncating store to ensure + // that the bits end up in the right place. + Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr, + MachinePointerInfo(), MemVT, + false, false, 0)); + + // The order of the stores doesn't matter - say it with a TokenFactor. + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + + // Finally, perform the original load only redirected to the stack slot. + Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + MachinePointerInfo(), LoadedVT, false,false, false, + 0); + + // Callers expect a MERGE_VALUES node. + ValResult = Load; + ChainResult = TF; + return; + } + assert(LoadedVT.isInteger() && !LoadedVT.isVector() && + "Unaligned load of unsupported type."); + + // Compute the new VT that is half the size of the old one. This is an + // integer MVT. + unsigned NumBits = LoadedVT.getSizeInBits(); + EVT NewLoadedVT; + NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2); + NumBits >>= 1; + + unsigned Alignment = LD->getAlignment(); + unsigned IncrementSize = NumBits / 8; + ISD::LoadExtType HiExtType = LD->getExtensionType(); + + // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD. + if (HiExtType == ISD::NON_EXTLOAD) + HiExtType = ISD::ZEXTLOAD; + + // Load the value in two parts + SDValue Lo, Hi; + if (DAG.getDataLayout().isLittleEndian()) { + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(),LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); + } else { + Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), LD->isInvariant(), Alignment, + LD->getAAInfo()); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + NewLoadedVT, LD->isVolatile(), + LD->isNonTemporal(), LD->isInvariant(), + MinAlign(Alignment, IncrementSize), LD->getAAInfo()); + } + + // aggregate the two parts + SDValue ShiftAmount = + DAG.getConstant(NumBits, dl, TLI.getShiftAmountTy(Hi.getValueType(), + DAG.getDataLayout())); + SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); + Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); + + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + ValResult = Result; + ChainResult = TF; +} + +/// Some target cannot handle a variable insertion index for the +/// INSERT_VECTOR_ELT instruction. In this case, it +/// is necessary to spill the vector being inserted into to memory, perform +/// the insert there, and then read the result back. +SDValue SelectionDAGLegalize:: +PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, + SDLoc dl) { + SDValue Tmp1 = Vec; + SDValue Tmp2 = Val; + SDValue Tmp3 = Idx; + + // If the target doesn't support this, we have to spill the input vector + // to a temporary stack slot, update the element, then reload it. This is + // badness. We could also load the value into a vector register (either + // with a "move to register" or "extload into register" instruction, then + // permute it into place, if the idx is a constant and if the idx is + // supported by the target. + EVT VT = Tmp1.getValueType(); + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = Tmp3.getValueType(); + EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + SDValue StackPtr = DAG.CreateStackTemporary(VT); + + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + + // Store the vector. + SDValue Ch = DAG.getStore( + DAG.getEntryNode(), dl, Tmp1, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, 0); + + // Truncate or zero extend offset to target pointer type. + Tmp3 = DAG.getZExtOrTrunc(Tmp3, dl, PtrVT); + // Add the offset to the index. + unsigned EltSize = EltVT.getSizeInBits()/8; + Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3, + DAG.getConstant(EltSize, dl, IdxVT)); + SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr); + // Store the scalar value. + Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT, + false, false, 0); + // Load the updated vector. + return DAG.getLoad(VT, dl, Ch, StackPtr, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), SPFI), + false, false, false, 0); +} + + +SDValue SelectionDAGLegalize:: +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { + if (ConstantSDNode *InsertPos = dyn_cast<ConstantSDNode>(Idx)) { + // SCALAR_TO_VECTOR requires that the type of the value being inserted + // match the element type of the vector being created, except for + // integers in which case the inserted value can be over width. + EVT EltVT = Vec.getValueType().getVectorElementType(); + if (Val.getValueType() == EltVT || + (EltVT.isInteger() && Val.getValueType().bitsGE(EltVT))) { + SDValue ScVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, + Vec.getValueType(), Val); + + unsigned NumElts = Vec.getValueType().getVectorNumElements(); + // We generate a shuffle of InVec and ScVec, so the shuffle mask + // should be 0,1,2,3,4,5... with the appropriate element replaced with + // elt 0 of the RHS. + SmallVector<int, 8> ShufOps; + for (unsigned i = 0; i != NumElts; ++i) + ShufOps.push_back(i != InsertPos->getZExtValue() ? i : NumElts); + + return DAG.getVectorShuffle(Vec.getValueType(), dl, Vec, ScVec, + &ShufOps[0]); + } + } + return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); +} + +SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDLoc dl(ST); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + TLI.isTypeLegal(MVT::i32)) { + SDValue Con = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + SDLoc(CFP), MVT::i32); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment, AAInfo); + } + + if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (TLI.isTypeLegal(MVT::i64)) { + SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), SDLoc(CFP), MVT::i64); + return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment, AAInfo); + } + + if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal = CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(IntVal.trunc(32), dl, MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), dl, MVT::i32); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment, AAInfo); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(4, dl, Ptr.getValueType())); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(4), + isVolatile, isNonTemporal, MinAlign(Alignment, 4U), + AAInfo); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + } + } + return SDValue(nullptr, 0); +} + +void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { + StoreSDNode *ST = cast<StoreSDNode>(Node); + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + SDLoc dl(Node); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + + if (!ST->isTruncatingStore()) { + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + ReplaceNode(ST, OptStore); + return; + } + + { + SDValue Value = ST->getValue(); + MVT VT = Value.getSimpleValueType(); + switch (TLI.getOperationAction(ISD::STORE, VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + // If this is an unaligned store and the target doesn't support it, + // expand it. + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + return; + } + } else { + SDValue Value = ST->getValue(); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + auto &DL = DAG.getDataLayout(); + + if (StWidth != StVT.getStoreSizeInBits()) { + // Promote to a byte-sized store with upper bits zero if not + // storing an integral number of bytes. For example, promote + // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1) + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + StVT.getStoreSizeInBits()); + Value = DAG.getZeroExtendInReg(Value, dl, StVT); + SDValue Result = + DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + } else if (StWidth & (StWidth - 1)) { + // If not storing a power-of-2 number of bits, expand as two stores. + assert(!StVT.isVector() && "Unsupported truncstore!"); + unsigned RoundWidth = 1 << Log2_32(StWidth); + assert(RoundWidth < StWidth); + unsigned ExtraWidth = StWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Store size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi; + unsigned IncrementSize; + + if (DL.isLittleEndian()) { + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16) + // Store the bottom RoundWidth bits. + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + RoundVT, + isVolatile, isNonTemporal, Alignment, + AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize), AAInfo); + } else { + // Big endian - avoid unaligned stores. + // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X + // Store the top RoundWidth bits. + Hi = DAG.getNode( + ISD::SRL, dl, Value.getValueType(), Value, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Value.getValueType(), DL))); + Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), + RoundVT, isVolatile, isNonTemporal, Alignment, + AAInfo); + + // Store the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, + ST->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize), AAInfo); + } + + // The order of the stores doesn't matter. + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + ReplaceNode(SDValue(Node, 0), Result); + } else { + switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = ST->getMemoryVT(); + unsigned AS = ST->getAddressSpace(); + unsigned Align = ST->getAlignment(); + // If this is an unaligned store and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedStore(cast<StoreSDNode>(Node), DAG, TLI, this); + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res && Res != SDValue(Node, 0)) + ReplaceNode(SDValue(Node, 0), Res); + return; + } + case TargetLowering::Expand: + assert(!StVT.isVector() && + "Vector Stores are handled in LegalizeVectorOps"); + + // TRUNCSTORE:i16 i32 -> STORE i16 + assert(TLI.isTypeLegal(StVT) && + "Do not know how to expand this store!"); + Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); + SDValue Result = + DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment, AAInfo); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + } + } +} + +void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { + LoadSDNode *LD = cast<LoadSDNode>(Node); + SDValue Chain = LD->getChain(); // The chain. + SDValue Ptr = LD->getBasePtr(); // The base pointer. + SDValue Value; // The value returned by the load op. + SDLoc dl(Node); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (ExtType == ISD::NON_EXTLOAD) { + MVT VT = Node->getSimpleValueType(0); + SDValue RVal = SDValue(Node, 0); + SDValue RChain = SDValue(Node, 1); + + switch (TLI.getOperationAction(Node->getOpcode(), VT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: { + EVT MemVT = LD->getMemoryVT(); + unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + // If this is an unaligned load and the target doesn't support it, + // expand it. + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, RVal, RChain); + break; + } + case TargetLowering::Custom: { + SDValue Res = TLI.LowerOperation(RVal, DAG); + if (Res.getNode()) { + RVal = Res; + RChain = Res.getValue(1); + } + break; + } + case TargetLowering::Promote: { + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote loads to same size type"); + + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); + RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); + RChain = Res.getValue(1); + break; + } + } + if (RChain.getNode() != Node) { + assert(RVal.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), RVal); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), RChain); + if (UpdatedNodes) { + UpdatedNodes->insert(RVal.getNode()); + UpdatedNodes->insert(RChain.getNode()); + } + ReplacedNode(Node); + } + return; + } + + EVT SrcVT = LD->getMemoryVT(); + unsigned SrcWidth = SrcVT.getSizeInBits(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + if (SrcWidth != SrcVT.getStoreSizeInBits() && + // Some targets pretend to have an i1 loading operation, and actually + // load an i8. This trick is correct for ZEXTLOAD because the top 7 + // bits are guaranteed to be zero; it helps the optimizers understand + // that these bits are zero. It is also useful for EXTLOAD, since it + // tells the optimizers that those bits are undefined. It would be + // nice to have an effective generic way of getting these benefits... + // Until such a way is found, don't insist on promoting i1 here. + (SrcVT != MVT::i1 || + TLI.getLoadExtAction(ExtType, Node->getValueType(0), MVT::i1) == + TargetLowering::Promote)) { + // Promote to a byte-sized load if not loading an integral number of + // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24. + unsigned NewWidth = SrcVT.getStoreSizeInBits(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth); + SDValue Ch; + + // The extra bits are guaranteed to be zero, since we stored them that + // way. A zext load from NVT thus automatically gives zext from SrcVT. + + ISD::LoadExtType NewExtType = + ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; + + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Chain, Ptr, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); + + Ch = Result.getValue(1); // The chain. + + if (ExtType == ISD::SEXTLOAD) + // Having the top bits zero doesn't help when sign extending. + Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType()) + // All the top bits are guaranteed to be zero - inform the optimizers. + Result = DAG.getNode(ISD::AssertZext, dl, + Result.getValueType(), Result, + DAG.getValueType(SrcVT)); + + Value = Result; + Chain = Ch; + } else if (SrcWidth & (SrcWidth - 1)) { + // If not loading a power-of-2 number of bits, expand as two loads. + assert(!SrcVT.isVector() && "Unsupported extload!"); + unsigned RoundWidth = 1 << Log2_32(SrcWidth); + assert(RoundWidth < SrcWidth); + unsigned ExtraWidth = SrcWidth - RoundWidth; + assert(ExtraWidth < RoundWidth); + assert(!(RoundWidth % 8) && !(ExtraWidth % 8) && + "Load size not an integral number of bytes!"); + EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth); + EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth); + SDValue Lo, Hi, Ch; + unsigned IncrementSize; + auto &DL = DAG.getDataLayout(); + + if (DL.isLittleEndian()) { + // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) + // Load the bottom RoundWidth bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), + Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, isInvariant, Alignment, AAInfo); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode( + ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(RoundWidth, dl, + TLI.getShiftAmountTy(Hi.getValueType(), DL))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } else { + // Big endian - avoid unaligned loads. + // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 + // Load the top RoundWidth bits. + Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo(), RoundVT, isVolatile, + isNonTemporal, isInvariant, Alignment, AAInfo); + + // Load the remaining ExtraWidth bits. + IncrementSize = RoundWidth / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, + Ptr.getValueType())); + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + dl, Node->getValueType(0), Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + ExtraVT, isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); + + // Build a factor node to remember that this load is independent of + // the other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Move the top bits to the right place. + Hi = DAG.getNode( + ISD::SHL, dl, Hi.getValueType(), Hi, + DAG.getConstant(ExtraWidth, dl, + TLI.getShiftAmountTy(Hi.getValueType(), DL))); + + // Join the hi and lo parts. + Value = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + } + + Chain = Ch; + } else { + bool isCustom = false; + switch (TLI.getLoadExtAction(ExtType, Node->getValueType(0), + SrcVT.getSimpleVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Custom: + isCustom = true; + // FALLTHROUGH + case TargetLowering::Legal: { + Value = SDValue(Node, 0); + Chain = SDValue(Node, 1); + + if (isCustom) { + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + Value = Res; + Chain = Res.getValue(1); + } + } else { + // If this is an unaligned load and the target doesn't support it, + // expand it. + EVT MemVT = LD->getMemoryVT(); + unsigned AS = LD->getAddressSpace(); + unsigned Align = LD->getAlignment(); + const DataLayout &DL = DAG.getDataLayout(); + if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) + ExpandUnalignedLoad(cast<LoadSDNode>(Node), DAG, TLI, Value, Chain); + } + break; + } + case TargetLowering::Expand: + EVT DestVT = Node->getValueType(0); + if (!TLI.isLoadExtLegal(ISD::EXTLOAD, DestVT, SrcVT)) { + // If the source type is not legal, see if there is a legal extload to + // an intermediate type that we can then extend further. + EVT LoadVT = TLI.getRegisterType(SrcVT.getSimpleVT()); + if (TLI.isTypeLegal(SrcVT) || // Same as SrcVT == LoadVT? + TLI.isLoadExtLegal(ExtType, LoadVT, SrcVT)) { + // If we are loading a legal type, this is a non-extload followed by a + // full extend. + ISD::LoadExtType MidExtType = + (LoadVT == SrcVT) ? ISD::NON_EXTLOAD : ExtType; + + SDValue Load = DAG.getExtLoad(MidExtType, dl, LoadVT, Chain, Ptr, + SrcVT, LD->getMemOperand()); + unsigned ExtendOp = + ISD::getExtForLoadExtType(SrcVT.isFloatingPoint(), ExtType); + Value = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Chain = Load.getValue(1); + break; + } + + // Handle the special case of fp16 extloads. EXTLOAD doesn't have the + // normal undefined upper bits behavior to allow using an in-reg extend + // with the illegal FP type, so load as an integer and do the + // from-integer conversion. + if (SrcVT.getScalarType() == MVT::f16) { + EVT ISrcVT = SrcVT.changeTypeToInteger(); + EVT IDestVT = DestVT.changeTypeToInteger(); + EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT()); + + SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT, + Chain, Ptr, ISrcVT, + LD->getMemOperand()); + Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result); + Chain = Result.getValue(1); + break; + } + } + + assert(!SrcVT.isVector() && + "Vector Loads are handled in LegalizeVectorOps"); + + // FIXME: This does not work for vectors on most targets. Sign- + // and zero-extend operations are currently folded into extending + // loads, whether they are legal or not, and then we end up here + // without any support for legalizing them. + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); + // Turn the unsupported load into an EXTLOAD followed by an + // explicit zero/sign extend inreg. + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, + Node->getValueType(0), + Chain, Ptr, SrcVT, + LD->getMemOperand()); + SDValue ValRes; + if (ExtType == ISD::SEXTLOAD) + ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, + Result.getValueType(), + Result, DAG.getValueType(SrcVT)); + else + ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); + Value = ValRes; + Chain = Result.getValue(1); + break; + } + } + + // Since loads produce two values, make sure to remember that we legalized + // both of them. + if (Chain.getNode() != Node) { + assert(Value.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Value); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Value.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } + ReplacedNode(Node); + } +} + +/// Return a legal replacement for the given operation, with all legal operands. +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG)); + + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; + +#ifndef NDEBUG + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && + "Unexpected illegal type!"); + + for (const SDValue &Op : Node->op_values()) + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); +#endif + + // Figure out the correct action; the way to query this varies by opcode + TargetLowering::LegalizeAction Action = TargetLowering::Legal; + bool SimpleFinishLegalizing = true; + switch (Node->getOpcode()) { + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::STACKSAVE: + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + break; + case ISD::VAARG: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + if (Action != TargetLowering::Promote) + Action = TLI.getOperationAction(Node->getOpcode(), MVT::Other); + break; + case ISD::FP_TO_FP16: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::EXTRACT_VECTOR_ELT: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: { + EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Action = TLI.getOperationAction(Node->getOpcode(), InnerType); + break; + } + case ISD::ATOMIC_STORE: { + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(2).getValueType()); + break; + } + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::BR_CC: { + unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : + Node->getOpcode() == ISD::SETCC ? 2 : + Node->getOpcode() == ISD::SETCCE ? 3 : 1; + unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; + MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(CCOperand))->get(); + Action = TLI.getCondCodeAction(CCCode, OpVT); + if (Action == TargetLowering::Legal) { + if (Node->getOpcode() == ISD::SELECT_CC) + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getValueType(0)); + else + Action = TLI.getOperationAction(Node->getOpcode(), OpVT); + } + break; + } + case ISD::LOAD: + case ISD::STORE: + // FIXME: Model these properly. LOAD and STORE are complicated, and + // STORE expects the unlegalized operand in some cases. + SimpleFinishLegalizing = false; + break; + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + // FIXME: This shouldn't be necessary. These nodes have special properties + // dealing with the recursive nature of legalization. Removing this + // special case should be done as part of making LegalizeDAG non-recursive. + SimpleFinishLegalizing = false; + break; + case ISD::EXTRACT_ELEMENT: + case ISD::FLT_ROUNDS_: + case ISD::FPOWI: + case ISD::MERGE_VALUES: + case ISD::EH_RETURN: + case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: + case ISD::EH_SJLJ_SETUP_DISPATCH: + // These operations lie about being legal: when they claim to be legal, + // they should actually be expanded. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Expand; + break; + case ISD::INIT_TRAMPOLINE: + case ISD::ADJUST_TRAMPOLINE: + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + // These operations lie about being legal: when they claim to be legal, + // they should actually be custom-lowered. + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Legal) + Action = TargetLowering::Custom; + break; + case ISD::READCYCLECOUNTER: + // READCYCLECOUNTER returns an i64, even if type legalization might have + // expanded that to several smaller types. + Action = TLI.getOperationAction(Node->getOpcode(), MVT::i64); + break; + case ISD::READ_REGISTER: + case ISD::WRITE_REGISTER: + // Named register is legal in the DAG, but blocked by register name + // selection if not implemented by target (to chose the correct register) + // They'll be converted to Copy(To/From)Reg. + Action = TargetLowering::Legal; + break; + case ISD::DEBUGTRAP: + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + if (Action == TargetLowering::Expand) { + // replace ISD::DEBUGTRAP with ISD::TRAP + SDValue NewVal; + NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), + Node->getOperand(0)); + ReplaceNode(Node, NewVal.getNode()); + LegalizeOp(NewVal.getNode()); + return; + } + break; + + default: + if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { + Action = TargetLowering::Legal; + } else { + Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); + } + break; + } + + if (SimpleFinishLegalizing) { + SDNode *NewNode = Node; + switch (Node->getOpcode()) { + default: break; + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::ROTL: + case ISD::ROTR: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Node->getOperand(1).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(1)); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Handle.getValue()); + } + break; + case ISD::SRL_PARTS: + case ISD::SRA_PARTS: + case ISD::SHL_PARTS: + // Legalizing shifts/rotates requires adjusting the shift amount + // to the appropriate width. + if (!Node->getOperand(2).getValueType().isVector()) { + SDValue SAO = + DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(), + Node->getOperand(2)); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0), + Node->getOperand(1), + Handle.getValue()); + } + break; + } + + if (NewNode != Node) { + ReplaceNode(Node, NewNode); + Node = NewNode; + } + switch (Action) { + case TargetLowering::Legal: + return; + case TargetLowering::Custom: { + // FIXME: The handling for custom lowering with multiple results is + // a complete mess. + SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Res.getNode()) { + if (!(Res.getNode() != Node || Res.getResNo() != 0)) + return; + + if (Node->getNumValues() == 1) { + // We can just directly replace this node with the lowered value. + ReplaceNode(SDValue(Node, 0), Res); + return; + } + + SmallVector<SDValue, 8> ResultVals; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Res.getValue(i)); + ReplaceNode(Node, ResultVals.data()); + return; + } + } + // FALL THROUGH + case TargetLowering::Expand: + if (ExpandNode(Node)) + return; + // FALL THROUGH + case TargetLowering::LibCall: + ConvertNodeToLibcall(Node); + return; + case TargetLowering::Promote: + PromoteNode(Node); + return; + } + } + + switch (Node->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "NODE: "; + Node->dump( &DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to legalize this operator!"); + + case ISD::CALLSEQ_START: + case ISD::CALLSEQ_END: + break; + case ISD::LOAD: { + return LegalizeLoadOps(Node); + } + case ISD::STORE: { + return LegalizeStoreOps(Node); + } + } +} + +SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { + SDValue Vec = Op.getOperand(0); + SDValue Idx = Op.getOperand(1); + SDLoc dl(Op); + + // Before we generate a new store to a temporary stack slot, see if there is + // already one that we can use. There often is because when we scalarize + // vector operations (using SelectionDAG::UnrollVectorOp for example) a whole + // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in + // the vector. If all are expanded here, we don't want one store per vector + // element. + + // Caches for hasPredecessorHelper + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + + SDValue StackPtr, Ch; + for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), + UE = Vec.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (StoreSDNode *ST = dyn_cast<StoreSDNode>(User)) { + if (ST->isIndexed() || ST->isTruncatingStore() || + ST->getValue() != Vec) + continue; + + // Make sure that nothing else could have stored into the destination of + // this store. + if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) + continue; + + // If the index is dependent on the store we will introduce a cycle when + // creating the load (the load uses the index, and by replacing the chain + // we will make the index dependent on the load). + if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + continue; + + StackPtr = ST->getBasePtr(); + Ch = SDValue(ST, 0); + break; + } + } + + if (!Ch.getNode()) { + // Store the value to a temporary stack slot, then LOAD the returned part. + StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + } + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); + + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); + StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); + + SDValue NewLoad; + + if (Op.getValueType().isVector()) + NewLoad = DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, + MachinePointerInfo(), false, false, false, 0); + else + NewLoad = DAG.getExtLoad( + ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), + Vec.getValueType().getVectorElementType(), false, false, false, 0); + + // Replace the chain going out of the store, by the one out of the load. + DAG.ReplaceAllUsesOfValueWith(Ch, SDValue(NewLoad.getNode(), 1)); + + // We introduced a cycle though, so update the loads operands, making sure + // to use the original store's chain as an incoming chain. + SmallVector<SDValue, 6> NewLoadOperands(NewLoad->op_begin(), + NewLoad->op_end()); + NewLoadOperands[0] = Ch; + NewLoad = + SDValue(DAG.UpdateNodeOperands(NewLoad.getNode(), NewLoadOperands), 0); + return NewLoad; +} + +SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { + assert(Op.getValueType().isVector() && "Non-vector insert subvector!"); + + SDValue Vec = Op.getOperand(0); + SDValue Part = Op.getOperand(1); + SDValue Idx = Op.getOperand(2); + SDLoc dl(Op); + + // Store the value to a temporary stack slot, then LOAD the returned part. + + SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); + + // First store the whole vector. + SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, + false, false, 0); + + // Then store the inserted part. + + // Add the offset to the index. + unsigned EltSize = + Vec.getValueType().getVectorElementType().getSizeInBits()/8; + + Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, + DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType())); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy(DAG.getDataLayout())); + + SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + StackPtr); + + // Store the subvector. + Ch = DAG.getStore(Ch, dl, Part, SubStackPtr, + MachinePointerInfo(), false, false, 0); + + // Finally, load the updated vector. + return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, + false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + // We can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(Node); + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = EltVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, dl, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + // If the destination vector element type is narrower than the source + // element type, only store the bits necessary. + if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) { + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + EltVT, false, false, 0)); + } else + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, + PtrInfo.getWithOffset(Offset), + false, false, 0)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + false, false, false, 0); +} + +namespace { +/// Keeps track of state when getting the sign of a floating-point value as an +/// integer. +struct FloatSignAsInt { + EVT FloatVT; + SDValue Chain; + SDValue FloatPtr; + SDValue IntPtr; + MachinePointerInfo IntPointerInfo; + MachinePointerInfo FloatPointerInfo; + SDValue IntValue; + APInt SignMask; +}; +} + +/// Bitcast a floating-point value to an integer value. Only bitcast the part +/// containing the sign bit if the target has no integer value capable of +/// holding all bits of the floating-point value. +void SelectionDAGLegalize::getSignAsIntValue(FloatSignAsInt &State, + SDLoc DL, SDValue Value) const { + EVT FloatVT = Value.getValueType(); + unsigned NumBits = FloatVT.getSizeInBits(); + State.FloatVT = FloatVT; + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), NumBits); + // Convert to an integer of the same size. + if (TLI.isTypeLegal(IVT)) { + State.IntValue = DAG.getNode(ISD::BITCAST, DL, IVT, Value); + State.SignMask = APInt::getSignBit(NumBits); + return; + } + + auto &DataLayout = DAG.getDataLayout(); + // Store the float to memory, then load the sign part out as an integer. + MVT LoadTy = TLI.getRegisterType(*DAG.getContext(), MVT::i8); + // First create a temporary that is aligned for both the load and store. + SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy); + int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + // Then store the float to it. + State.FloatPtr = StackPtr; + MachineFunction &MF = DAG.getMachineFunction(); + State.FloatPointerInfo = MachinePointerInfo::getFixedStack(MF, FI); + State.Chain = DAG.getStore(DAG.getEntryNode(), DL, Value, State.FloatPtr, + State.FloatPointerInfo, false, false, 0); + + SDValue IntPtr; + if (DataLayout.isBigEndian()) { + assert(FloatVT.isByteSized() && "Unsupported floating point type!"); + // Load out a legal integer with the same sign bit as the float. + IntPtr = StackPtr; + State.IntPointerInfo = State.FloatPointerInfo; + } else { + // Advance the pointer so that the loaded byte will contain the sign bit. + unsigned ByteOffset = (FloatVT.getSizeInBits() / 8) - 1; + IntPtr = DAG.getNode(ISD::ADD, DL, StackPtr.getValueType(), StackPtr, + DAG.getConstant(ByteOffset, DL, StackPtr.getValueType())); + State.IntPointerInfo = MachinePointerInfo::getFixedStack(MF, FI, + ByteOffset); + } + + State.IntPtr = IntPtr; + State.IntValue = DAG.getExtLoad(ISD::EXTLOAD, DL, LoadTy, State.Chain, + IntPtr, State.IntPointerInfo, MVT::i8, + false, false, false, 0); + State.SignMask = APInt::getOneBitSet(LoadTy.getSizeInBits(), 7); +} + +/// Replace the integer value produced by getSignAsIntValue() with a new value +/// and cast the result back to a floating-point type. +SDValue SelectionDAGLegalize::modifySignAsInt(const FloatSignAsInt &State, + SDLoc DL, SDValue NewIntValue) const { + if (!State.Chain) + return DAG.getNode(ISD::BITCAST, DL, State.FloatVT, NewIntValue); + + // Override the part containing the sign bit in the value stored on the stack. + SDValue Chain = DAG.getTruncStore(State.Chain, DL, NewIntValue, State.IntPtr, + State.IntPointerInfo, MVT::i8, false, false, + 0); + return DAG.getLoad(State.FloatVT, DL, Chain, State.FloatPtr, + State.FloatPointerInfo, false, false, false, 0); +} + +SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode *Node) const { + SDLoc DL(Node); + SDValue Mag = Node->getOperand(0); + SDValue Sign = Node->getOperand(1); + + // Get sign bit into an integer value. + FloatSignAsInt SignAsInt; + getSignAsIntValue(SignAsInt, DL, Sign); + + EVT IntVT = SignAsInt.IntValue.getValueType(); + SDValue SignMask = DAG.getConstant(SignAsInt.SignMask, DL, IntVT); + SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, SignAsInt.IntValue, + SignMask); + + // If FABS is legal transform FCOPYSIGN(x, y) => sign(x) ? -FABS(x) : FABS(X) + EVT FloatVT = Mag.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FABS, FloatVT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, FloatVT)) { + SDValue AbsValue = DAG.getNode(ISD::FABS, DL, FloatVT, Mag); + SDValue NegValue = DAG.getNode(ISD::FNEG, DL, FloatVT, AbsValue); + SDValue Cond = DAG.getSetCC(DL, getSetCCResultType(IntVT), SignBit, + DAG.getConstant(0, DL, IntVT), ISD::SETNE); + return DAG.getSelect(DL, FloatVT, Cond, NegValue, AbsValue); + } + + // Transform values to integer, copy the sign bit and transform back. + FloatSignAsInt MagAsInt; + getSignAsIntValue(MagAsInt, DL, Mag); + assert(SignAsInt.SignMask == MagAsInt.SignMask); + SDValue ClearSignMask = DAG.getConstant(~SignAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, MagAsInt.IntValue, + ClearSignMask); + SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit); + + return modifySignAsInt(MagAsInt, DL, CopiedSign); +} + +SDValue SelectionDAGLegalize::ExpandFABS(SDNode *Node) const { + SDLoc DL(Node); + SDValue Value = Node->getOperand(0); + + // Transform FABS(x) => FCOPYSIGN(x, 0.0) if FCOPYSIGN is legal. + EVT FloatVT = Value.getValueType(); + if (TLI.isOperationLegalOrCustom(ISD::FCOPYSIGN, FloatVT)) { + SDValue Zero = DAG.getConstantFP(0.0, DL, FloatVT); + return DAG.getNode(ISD::FCOPYSIGN, DL, FloatVT, Value, Zero); + } + + // Transform value to integer, clear the sign bit and transform back. + FloatSignAsInt ValueAsInt; + getSignAsIntValue(ValueAsInt, DL, Value); + EVT IntVT = ValueAsInt.IntValue.getValueType(); + SDValue ClearSignMask = DAG.getConstant(~ValueAsInt.SignMask, DL, IntVT); + SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, ValueAsInt.IntValue, + ClearSignMask); + return modifySignAsInt(ValueAsInt, DL, ClearedSign); +} + +void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, + SmallVectorImpl<SDValue> &Results) { + unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); + assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" + " not tell us which reg is the stack pointer!"); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = SDValue(Node, 0); + SDValue Tmp2 = SDValue(Node, 1); + SDValue Tmp3 = Node->getOperand(2); + SDValue Chain = Tmp1.getOperand(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + + SDValue Size = Tmp2.getOperand(1); + SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); + Chain = SP.getValue(1); + unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain + + Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + Results.push_back(Tmp1); + Results.push_back(Tmp2); +} + +/// Legalize a SETCC with given LHS and RHS and condition code CC on the current +/// target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getLogicalNOT() or take equivalent action to swap the effect +/// of a true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, + SDValue &LHS, SDValue &RHS, + SDValue &CC, + bool &NeedInvert, + SDLoc dl) { + MVT OpVT = LHS.getSimpleValueType(); + ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); + NeedInvert = false; + switch (TLI.getCondCodeAction(CCCode, OpVT)) { + default: llvm_unreachable("Unknown condition code action!"); + case TargetLowering::Legal: + // Nothing to do. + break; + case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } + ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; + unsigned Opc = 0; + switch (CCCode) { + default: llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETO: + assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) + == TargetLowering::Legal + && "If SETO is expanded, SETOEQ must be legal!"); + CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; + case ISD::SETUO: + assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) + == TargetLowering::Legal + && "If SETUO is expanded, SETUNE must be legal!"); + CC1 = ISD::SETUNE; CC2 = ISD::SETUNE; Opc = ISD::OR; break; + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: + // If we are floating point, assign and break, otherwise fall through. + if (!OpVT.isInteger()) { + // We can use the 4th bit to tell if we are the unordered + // or ordered version of the opcode. + CC2 = ((unsigned)CCCode & 0x8U) ? ISD::SETUO : ISD::SETO; + Opc = ((unsigned)CCCode & 0x8U) ? ISD::OR : ISD::AND; + CC1 = (ISD::CondCode)(((int)CCCode & 0x7) | 0x10); + break; + } + // Fallthrough if we are unsigned integer. + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETLT: + // We only support using the inverted operation, which is computed above + // and not a different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); + case ISD::SETNE: + case ISD::SETEQ: + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; + } + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); + } + + SDValue SetCC1, SetCC2; + if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { + // If we aren't the ordered or unorder operation, + // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS). + SetCC1 = DAG.getSetCC(dl, VT, LHS, RHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, LHS, RHS, CC2); + } else { + // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS) + SetCC1 = DAG.getSetCC(dl, VT, LHS, LHS, CC1); + SetCC2 = DAG.getSetCC(dl, VT, RHS, RHS, CC2); + } + LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); + RHS = SDValue(); + CC = SDValue(); + return true; + } + } + return false; +} + +/// Emit a store/load combination to the stack. This stores +/// SrcOp to a stack slot of type SlotVT, truncating it if needed. It then does +/// a load from the stack slot to DestVT, extending it if needed. +/// The resultant code need not be legal. +SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, + EVT SlotVT, + EVT DestVT, + SDLoc dl) { + // Create the stack frame object. + unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment( + SrcOp.getValueType().getTypeForEVT(*DAG.getContext())); + SDValue FIPtr = DAG.CreateStackTemporary(SlotVT, SrcAlign); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr); + int SPFI = StackPtrFI->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + + unsigned SrcSize = SrcOp.getValueType().getSizeInBits(); + unsigned SlotSize = SlotVT.getSizeInBits(); + unsigned DestSize = DestVT.getSizeInBits(); + Type *DestType = DestVT.getTypeForEVT(*DAG.getContext()); + unsigned DestAlign = DAG.getDataLayout().getPrefTypeAlignment(DestType); + + // Emit a store to the stack slot. Use a truncstore if the input value is + // later than DestVT. + SDValue Store; + + if (SrcSize > SlotSize) + Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + PtrInfo, SlotVT, false, false, SrcAlign); + else { + assert(SrcSize == SlotSize && "Invalid store"); + Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, + PtrInfo, false, false, SrcAlign); + } + + // Result is a load from the stack slot. + if (SlotSize == DestSize) + return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, + false, false, false, DestAlign); + + assert(SlotSize < DestSize && "Unknown extension!"); + return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, + PtrInfo, SlotVT, false, false, false, DestAlign); +} + +SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { + SDLoc dl(Node); + // Create a vector sized/aligned stack slot, store the value to element #0, + // then load the whole vector back out. + SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); + + FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(StackPtr); + int SPFI = StackPtrFI->getIndex(); + + SDValue Ch = DAG.getTruncStore( + DAG.getEntryNode(), dl, Node->getOperand(0), StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), + Node->getValueType(0).getVectorElementType(), false, false, 0); + return DAG.getLoad( + Node->getValueType(0), dl, Ch, StackPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI), false, + false, false, 0); +} + +static bool +ExpandBVWithShuffles(SDNode *Node, SelectionDAG &DAG, + const TargetLowering &TLI, SDValue &Res) { + unsigned NumElems = Node->getNumOperands(); + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + + // Try to group the scalars into pairs, shuffle the pairs together, then + // shuffle the pairs of pairs together, etc. until the vector has + // been built. This will work only if all of the necessary shuffle masks + // are legal. + + // We do this in two phases; first to check the legality of the shuffles, + // and next, assuming that all shuffles are legal, to create the new nodes. + for (int Phase = 0; Phase < 2; ++Phase) { + SmallVector<std::pair<SDValue, SmallVector<int, 16> >, 16> IntermedVals, + NewIntermedVals; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + + SDValue Vec; + if (Phase) + Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, V); + IntermedVals.push_back(std::make_pair(Vec, SmallVector<int, 16>(1, i))); + } + + while (IntermedVals.size() > 2) { + NewIntermedVals.clear(); + for (unsigned i = 0, e = (IntermedVals.size() & ~1u); i < e; i += 2) { + // This vector and the next vector are shuffled together (simply to + // append the one to the other). + SmallVector<int, 16> ShuffleVec(NumElems, -1); + + SmallVector<int, 16> FinalIndices; + FinalIndices.reserve(IntermedVals[i].second.size() + + IntermedVals[i+1].second.size()); + + int k = 0; + for (unsigned j = 0, f = IntermedVals[i].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = j; + FinalIndices.push_back(IntermedVals[i].second[j]); + } + for (unsigned j = 0, f = IntermedVals[i+1].second.size(); j != f; + ++j, ++k) { + ShuffleVec[k] = NumElems + j; + FinalIndices.push_back(IntermedVals[i+1].second[j]); + } + + SDValue Shuffle; + if (Phase) + Shuffle = DAG.getVectorShuffle(VT, dl, IntermedVals[i].first, + IntermedVals[i+1].first, + ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + NewIntermedVals.push_back( + std::make_pair(Shuffle, std::move(FinalIndices))); + } + + // If we had an odd number of defined values, then append the last + // element to the array of new vectors. + if ((IntermedVals.size() & 1) != 0) + NewIntermedVals.push_back(IntermedVals.back()); + + IntermedVals.swap(NewIntermedVals); + } + + assert(IntermedVals.size() <= 2 && IntermedVals.size() > 0 && + "Invalid number of intermediate vectors"); + SDValue Vec1 = IntermedVals[0].first; + SDValue Vec2; + if (IntermedVals.size() > 1) + Vec2 = IntermedVals[1].first; + else if (Phase) + Vec2 = DAG.getUNDEF(VT); + + SmallVector<int, 16> ShuffleVec(NumElems, -1); + for (unsigned i = 0, e = IntermedVals[0].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[0].second[i]] = i; + for (unsigned i = 0, e = IntermedVals[1].second.size(); i != e; ++i) + ShuffleVec[IntermedVals[1].second[i]] = NumElems + i; + + if (Phase) + Res = DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + else if (!TLI.isShuffleMaskLegal(ShuffleVec, VT)) + return false; + } + + return true; +} + +/// Expand a BUILD_VECTOR node on targets that don't +/// support the operation, but do support the resultant vector type. +SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { + unsigned NumElems = Node->getNumOperands(); + SDValue Value1, Value2; + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); + + // If the only non-undef value is the low element, turn this into a + // SCALAR_TO_VECTOR node. If this is { X, X, X, X }, determine X. + bool isOnlyLowElement = true; + bool MoreThanTwoValues = false; + bool isConstant = true; + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) + isConstant = false; + + if (!Value1.getNode()) { + Value1 = V; + } else if (!Value2.getNode()) { + if (V != Value1) + Value2 = V; + } else if (V != Value1 && V != Value2) { + MoreThanTwoValues = true; + } + } + + if (!Value1.getNode()) + return DAG.getUNDEF(VT); + + if (isOnlyLowElement) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Node->getOperand(0)); + + // If all elements are constants, create a load from the constant pool. + if (isConstant) { + SmallVector<Constant*, 16> CV; + for (unsigned i = 0, e = NumElems; i != e; ++i) { + if (ConstantFPSDNode *V = + dyn_cast<ConstantFPSDNode>(Node->getOperand(i))) { + CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue())); + } else if (ConstantSDNode *V = + dyn_cast<ConstantSDNode>(Node->getOperand(i))) { + if (OpVT==EltVT) + CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue())); + else { + // If OpVT and EltVT don't match, EltVT is not legal and the + // element values have been promoted/truncated earlier. Undo this; + // we don't want a v16i8 to become a v16i32 for example. + const ConstantInt *CI = V->getConstantIntValue(); + CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()), + CI->getZExtValue())); + } + } else { + assert(Node->getOperand(i).getOpcode() == ISD::UNDEF); + Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext()); + CV.push_back(UndefValue::get(OpNTy)); + } + } + Constant *CP = ConstantVector::get(CV); + SDValue CPIdx = + DAG.getConstantPool(CP, TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + return DAG.getLoad( + VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); + } + + SmallSet<SDValue, 16> DefinedValues; + for (unsigned i = 0; i < NumElems; ++i) { + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + DefinedValues.insert(Node->getOperand(i)); + } + + if (TLI.shouldExpandBuildVectorWithShuffles(VT, DefinedValues.size())) { + if (!MoreThanTwoValues) { + SmallVector<int, 8> ShuffleVec(NumElems, -1); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue V = Node->getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + ShuffleVec[i] = V == Value1 ? 0 : NumElems; + } + if (TLI.isShuffleMaskLegal(ShuffleVec, Node->getValueType(0))) { + // Get the splatted value into the low element of a vector register. + SDValue Vec1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value1); + SDValue Vec2; + if (Value2.getNode()) + Vec2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value2); + else + Vec2 = DAG.getUNDEF(VT); + + // Return shuffle(LowValVec, undef, <0,0,0,0>) + return DAG.getVectorShuffle(VT, dl, Vec1, Vec2, ShuffleVec.data()); + } + } else { + SDValue Res; + if (ExpandBVWithShuffles(Node, DAG, TLI, Res)) + return Res; + } + } + + // Otherwise, we can't handle this case efficiently. + return ExpandVectorBuildThroughStack(Node); +} + +// Expand a node into a call to a libcall. If the result value +// does not fit into a register, return the lo part and set the hi part to the +// by-reg argument. If it does fit into a single register, return the result +// and leave the Hi part unset. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + // By default, the input chain to this libcall is the entry node of the + // function. If the libcall is going to be emitted as a tail call then + // TLI.isUsedByReturnOnly will change it to the right chain if the return + // node which is being folded has a non-entry input chain. + SDValue InChain = DAG.getEntryNode(); + + // isTailCall may be true since the callee does not reference caller stack + // frame. Check if it's in the right position. + SDValue TCChain = InChain; + bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); + if (isTailCall) + InChain = TCChain; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + if (!CallInfo.second.getNode()) + // It's a tailcall, return the chain (which is the DAG root). + return DAG.getRoot(); + + return CallInfo.first; +} + +/// Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, SDLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + + std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo.first; +} + +// Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo; +} + +SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = Call_F32; break; + case MVT::f64: LC = Call_F64; break; + case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; + case MVT::ppcf128: LC = Call_PPCF128; break; + } + return ExpandLibCall(LC, Node, false); +} + +SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, + RTLIB::Libcall Call_I8, + RTLIB::Libcall Call_I16, + RTLIB::Libcall Call_I32, + RTLIB::Libcall Call_I64, + RTLIB::Libcall Call_I128) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC = Call_I8; break; + case MVT::i16: LC = Call_I16; break; + case MVT::i32: LC = Call_I32; break; + case MVT::i64: LC = Call_I64; break; + case MVT::i128: LC = Call_I128; break; + } + return ExpandLibCall(LC, Node, isSigned); +} + +/// Issue libcalls to __{u}divmod to compute div / rem pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + // Also pass the return address of the remainder. + SDValue FIPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = FIPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + SDLoc dl(Node); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + // Remainder is loaded back from the stack frame. + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + MachinePointerInfo(), false, false, false, 0); + Results.push_back(CallInfo.first); + Results.push_back(Rem); +} + +/// Return true if sincos libcall is available. +static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + return TLI.getLibcallName(LC) != nullptr; +} + +/// Return true if sincos libcall is available and can be used to combine sin +/// and cos. +static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, + const TargetMachine &TM) { + if (!isSinCosLibcallAvailable(Node, TLI)) + return false; + // GNU sin/cos functions set errno while sincos does not. Therefore + // combining sin and cos is only safe if unsafe-fpmath is enabled. + bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; + if (isGNU && !TM.Options.UnsafeFPMath) + return false; + return true; +} + +/// Only issue sincos libcall if both sin and cos are needed. +static bool useSinCos(SDNode *Node) { + unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN + ? ISD::FCOS : ISD::FSIN; + + SDValue Op0 = Node->getOperand(0); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + // The other user might have been turned into sincos already. + if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) + return true; + } + return false; +} + +/// Issue libcalls to sincos to compute sin / cos pairs. +void +SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, + SmallVectorImpl<SDValue> &Results) { + RTLIB::Libcall LC; + switch (Node->getSimpleValueType(0).SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = RetTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Pass the return address of sin. + SDValue SinPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = SinPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Also pass the return address of the cos. + SDValue CosPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = CosPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + SDLoc dl(Node); + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, + MachinePointerInfo(), false, false, false, 0)); + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, + MachinePointerInfo(), false, false, false, 0)); +} + +/// This function is responsible for legalizing a +/// INT_TO_FP operation of the specified operand when the target requests that +/// we expand it. At this point, we know that the result and operand types are +/// legal for the target. +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, + SDValue Op0, + EVT DestVT, + SDLoc dl) { + // TODO: Should any fast-math-flags be set for the created nodes? + + if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { + // simple 32-bit [signed|unsigned] integer to float/double expansion + + // Get the stack frame index of a 8 byte buffer. + SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); + + // word offset constant for Hi/Lo address computation + SDValue WordOff = DAG.getConstant(sizeof(int), dl, + StackSlot.getValueType()); + // set up Hi and Lo (into buffer) address based on endian + SDValue Hi = StackSlot; + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); + if (DAG.getDataLayout().isLittleEndian()) + std::swap(Hi, Lo); + + // if signed map to unsigned space + SDValue Op0Mapped; + if (isSigned) { + // constant used to invert sign bit (signed to unsigned mapping) + SDValue SignBit = DAG.getConstant(0x80000000u, dl, MVT::i32); + Op0Mapped = DAG.getNode(ISD::XOR, dl, MVT::i32, Op0, SignBit); + } else { + Op0Mapped = Op0; + } + // store the lo of the constructed double - based on integer input + SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, + Op0Mapped, Lo, MachinePointerInfo(), + false, false, 0); + // initial hi portion of constructed double + SDValue InitialHi = DAG.getConstant(0x43300000u, dl, MVT::i32); + // store the hi of the constructed double - biased exponent + SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi, + MachinePointerInfo(), + false, false, 0); + // load the constructed double + SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, + MachinePointerInfo(), false, false, false, 0); + // FP constant to bias correct the final result + SDValue Bias = DAG.getConstantFP(isSigned ? + BitsToDouble(0x4330000080000000ULL) : + BitsToDouble(0x4330000000000000ULL), + dl, MVT::f64); + // subtract the bias + SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + // final result + SDValue Result; + // handle final rounding + if (DestVT == MVT::f64) { + // do nothing + Result = Sub; + } else if (DestVT.bitsLT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, + DAG.getIntPtrConstant(0, dl)); + } else if (DestVT.bitsGT(MVT::f64)) { + Result = DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); + } + return Result; + } + assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); + // Code below here assumes !isSigned without checking again. + + // Implementation of unsigned i64 to f64 following the algorithm in + // __floatundidf in compiler_rt. This implementation has the advantage + // of performing rounding correctly, both in the default rounding mode + // and in all alternate rounding modes. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f64) { + SDValue TwoP52 = + DAG.getConstant(UINT64_C(0x4330000000000000), dl, MVT::i64); + SDValue TwoP84PlusTwoP52 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x4530000000100000)), dl, + MVT::f64); + SDValue TwoP84 = + DAG.getConstant(UINT64_C(0x4530000000000000), dl, MVT::i64); + + SDValue Lo = DAG.getZeroExtendInReg(Op0, dl, MVT::i32); + SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, + DAG.getConstant(32, dl, MVT::i64)); + SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52); + SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); + SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr); + SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); + return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); + } + + // Implementation of unsigned i64 to f32. + // TODO: Generalize this for use with other types. + if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { + // For unsigned conversions, convert them to signed conversions using the + // algorithm from the x86_64 __floatundidf in compiler_rt. + if (!isSigned) { + SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); + + SDValue ShiftConst = DAG.getConstant( + 1, dl, TLI.getShiftAmountTy(Op0.getValueType(), DAG.getDataLayout())); + SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); + SDValue AndConst = DAG.getConstant(1, dl, MVT::i64); + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr); + + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or); + SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt); + + // TODO: This really should be implemented using a branch rather than a + // select. We happen to get lucky and machinesink does the right + // thing most of the time. This would be a good candidate for a + //pseudo-op, or, even better, for whole-function isel. + SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), + Op0, DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); + return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); + } + + // Otherwise, implement the fully general conversion. + + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0xfffffffffffff800), dl, MVT::i64)); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, + DAG.getConstant(UINT64_C(0x800), dl, MVT::i64)); + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + DAG.getConstant(UINT64_C(0x7ff), dl, MVT::i64)); + SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, + DAG.getConstant(UINT64_C(0), dl, MVT::i64), + ISD::SETNE); + SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, + DAG.getConstant(UINT64_C(0x0020000000000000), dl, + MVT::i64), + ISD::SETUGE); + SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); + EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType(), DAG.getDataLayout()); + + SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, + DAG.getConstant(32, dl, SHVT)); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sh); + SDValue Fcvt = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Trunc); + SDValue TwoP32 = + DAG.getConstantFP(BitsToDouble(UINT64_C(0x41f0000000000000)), dl, + MVT::f64); + SDValue Fmul = DAG.getNode(ISD::FMUL, dl, MVT::f64, TwoP32, Fcvt); + SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Sel2); + SDValue Fcvt2 = DAG.getNode(ISD::UINT_TO_FP, dl, MVT::f64, Lo); + SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2); + return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd, + DAG.getIntPtrConstant(0, dl)); + } + + SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), + Op0, + DAG.getConstant(0, dl, Op0.getValueType()), + ISD::SETLT); + SDValue Zero = DAG.getIntPtrConstant(0, dl), + Four = DAG.getIntPtrConstant(4, dl); + SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), + SignSet, Four, Zero); + + // If the sign bit of the integer is set, the large number will be treated + // as a negative number. To counteract this, the dynamic code adds an + // offset depending on the data type. + uint64_t FF; + switch (Op0.getSimpleValueType().SimpleTy) { + default: llvm_unreachable("Unsupported integer type!"); + case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) + case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) + case MVT::i32: FF = 0x4F800000ULL; break; // 2^32 (as a float) + case MVT::i64: FF = 0x5F800000ULL; break; // 2^64 (as a float) + } + if (DAG.getDataLayout().isLittleEndian()) + FF <<= 32; + Constant *FudgeFactor = ConstantInt::get( + Type::getInt64Ty(*DAG.getContext()), FF); + + SDValue CPIdx = + DAG.getConstantPool(FudgeFactor, TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); + Alignment = std::min(Alignment, 4u); + SDValue FudgeInReg; + if (DestVT == MVT::f32) + FudgeInReg = DAG.getLoad( + MVT::f32, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), false, + false, false, Alignment); + else { + SDValue Load = DAG.getExtLoad( + ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); + } + + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); +} + +/// This function is responsible for legalizing a +/// *INT_TO_FP operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal UINT_TO_FP or SINT_TO_FP +/// operation that takes a larger input. +SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, + EVT DestVT, + bool isSigned, + SDLoc dl) { + // First step, figure out the appropriate *INT_TO_FP operation to use. + EVT NewInTy = LegalOp.getValueType(); + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewInTy = (MVT::SimpleValueType)(NewInTy.getSimpleVT().SimpleTy+1); + assert(NewInTy.isInteger() && "Ran out of possibilities!"); + + // If the target supports SINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, NewInTy)) { + OpToUse = ISD::SINT_TO_FP; + break; + } + if (isSigned) continue; + + // If the target supports UINT_TO_FP of this type, use it. + if (TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, NewInTy)) { + OpToUse = ISD::UINT_TO_FP; + break; + } + + // Otherwise, try a larger type. + } + + // Okay, we found the operation and type to use. Zero extend our input to the + // desired type then run the operation on it. + return DAG.getNode(OpToUse, dl, DestVT, + DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + dl, NewInTy, LegalOp)); +} + +/// This function is responsible for legalizing a +/// FP_TO_*INT operation of the specified operand when the target requests that +/// we promote it. At this point, we know that the result and operand types are +/// legal for the target, and that there is a legal FP_TO_UINT or FP_TO_SINT +/// operation that returns a larger result. +SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, + EVT DestVT, + bool isSigned, + SDLoc dl) { + // First step, figure out the appropriate FP_TO*INT operation to use. + EVT NewOutTy = DestVT; + + unsigned OpToUse = 0; + + // Scan for the appropriate larger type to use. + while (1) { + NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); + assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + + // A larger signed type can hold all unsigned values of the requested type, + // so using FP_TO_SINT is valid + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { + OpToUse = ISD::FP_TO_SINT; + break; + } + + // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + OpToUse = ISD::FP_TO_UINT; + break; + } + + // Otherwise, try a larger type. + } + + + // Okay, we found the operation and type to use. + SDValue Operation = DAG.getNode(OpToUse, dl, NewOutTy, LegalOp); + + // Truncate the result of the extended FP_TO_*INT operation to the desired + // size. + return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation); +} + +/// Open code the operations for BITREVERSE. +SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, SDLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Sz = VT.getScalarSizeInBits(); + + SDValue Tmp, Tmp2; + Tmp = DAG.getConstant(0, dl, VT); + for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) { + if (I < J) + Tmp2 = + DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(J - I, dl, SHVT)); + else + Tmp2 = + DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(I - J, dl, SHVT)); + + APInt Shift(Sz, 1); + Shift = Shift.shl(J); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(Shift, dl, VT)); + Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp, Tmp2); + } + + return Tmp; +} + +/// Open code the operations for BSWAP of the specified operation. +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { + EVT VT = Op.getValueType(); + EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unhandled Expand type in BSWAP!"); + case MVT::i16: + Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + case MVT::i32: + Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(0xFF0000, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, DAG.getConstant(0xFF00, dl, VT)); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + return DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + case MVT::i64: + Tmp8 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp6 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp5 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp4 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT)); + Tmp3 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(24, dl, SHVT)); + Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(40, dl, SHVT)); + Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(56, dl, SHVT)); + Tmp7 = DAG.getNode(ISD::AND, dl, VT, Tmp7, + DAG.getConstant(255ULL<<48, dl, VT)); + Tmp6 = DAG.getNode(ISD::AND, dl, VT, Tmp6, + DAG.getConstant(255ULL<<40, dl, VT)); + Tmp5 = DAG.getNode(ISD::AND, dl, VT, Tmp5, + DAG.getConstant(255ULL<<32, dl, VT)); + Tmp4 = DAG.getNode(ISD::AND, dl, VT, Tmp4, + DAG.getConstant(255ULL<<24, dl, VT)); + Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp3, + DAG.getConstant(255ULL<<16, dl, VT)); + Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp2, + DAG.getConstant(255ULL<<8 , dl, VT)); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp7); + Tmp6 = DAG.getNode(ISD::OR, dl, VT, Tmp6, Tmp5); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp3); + Tmp2 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp1); + Tmp8 = DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp6); + Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp2); + return DAG.getNode(ISD::OR, dl, VT, Tmp8, Tmp4); + } +} + +/// Expand the specified bitcount instruction into operations. +SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, + SDLoc dl) { + switch (Opc) { + default: llvm_unreachable("Cannot expand this yet!"); + case ISD::CTPOP: { + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned Len = VT.getSizeInBits(); + + assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && + "CTPOP not implemented for this type."); + + // This is the "best" algorithm from + // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), + dl, VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), + dl, VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), + dl, VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), + dl, VT); + + // v = v - ((v >> 1) & 0x55555555...) + Op = DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(1, dl, ShVT)), + Mask55)); + // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...) + Op = DAG.getNode(ISD::ADD, dl, VT, + DAG.getNode(ISD::AND, dl, VT, Op, Mask33), + DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(2, dl, ShVT)), + Mask33)); + // v = (v + (v >> 4)) & 0x0F0F0F0F... + Op = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ADD, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, + DAG.getConstant(4, dl, ShVT))), + Mask0F); + // v = (v * 0x01010101...) >> (Len - 8) + Op = DAG.getNode(ISD::SRL, dl, VT, + DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), + DAG.getConstant(Len - 8, dl, ShVT)); + + return Op; + } + case ISD::CTLZ_ZERO_UNDEF: + // This trivially expands to CTLZ. + return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op); + case ISD::CTLZ: { + // for now, we do this: + // x = x | (x >> 1); + // x = x | (x >> 2); + // ... + // x = x | (x >>16); + // x = x | (x >>32); // for 64-bit input + // return popcount(~x); + // + // Ref: "Hacker's Delight" by Henry Warren + EVT VT = Op.getValueType(); + EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + unsigned len = VT.getSizeInBits(); + for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { + SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT); + Op = DAG.getNode(ISD::OR, dl, VT, Op, + DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3)); + } + Op = DAG.getNOT(dl, Op, VT); + return DAG.getNode(ISD::CTPOP, dl, VT, Op); + } + case ISD::CTTZ_ZERO_UNDEF: + // This trivially expands to CTTZ. + return DAG.getNode(ISD::CTTZ, dl, Op.getValueType(), Op); + case ISD::CTTZ: { + // for now, we use: { return popcount(~x & (x - 1)); } + // unless the target has ctlz but not ctpop, in which case we use: + // { return 32 - nlz(~x & (x-1)); } + // Ref: "Hacker's Delight" by Henry Warren + EVT VT = Op.getValueType(); + SDValue Tmp3 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNOT(dl, Op, VT), + DAG.getNode(ISD::SUB, dl, VT, Op, + DAG.getConstant(1, dl, VT))); + // If ISD::CTLZ is legal and CTPOP isn't, then do that instead. + if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) && + TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) + return DAG.getNode(ISD::SUB, dl, VT, + DAG.getConstant(VT.getSizeInBits(), dl, VT), + DAG.getNode(ISD::CTLZ, dl, VT, Tmp3)); + return DAG.getNode(ISD::CTPOP, dl, VT, Tmp3); + } + } +} + +bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; + switch (Node->getOpcode()) { + case ISD::CTPOP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + Tmp1 = ExpandBitCount(Node->getOpcode(), Node->getOperand(0), dl); + Results.push_back(Tmp1); + break; + case ISD::BITREVERSE: + Results.push_back(ExpandBITREVERSE(Node->getOperand(0), dl)); + break; + case ISD::BSWAP: + Results.push_back(ExpandBSWAP(Node->getOperand(0), dl)); + break; + case ISD::FRAMEADDR: + case ISD::RETURNADDR: + case ISD::FRAME_TO_ARGS_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + break; + case ISD::FLT_ROUNDS_: + Results.push_back(DAG.getConstant(1, dl, Node->getValueType(0))); + break; + case ISD::EH_RETURN: + case ISD::EH_LABEL: + case ISD::PREFETCH: + case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + // If the target didn't expand these, there's nothing to do, so just + // preserve the chain and be done. + Results.push_back(Node->getOperand(0)); + break; + case ISD::READCYCLECOUNTER: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.append(Node->getNumValues() - 1, + DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + // If the target didn't expand this, just return 'zero' and preserve the + // chain. + Results.push_back(DAG.getConstant(0, dl, MVT::i32)); + Results.push_back(Node->getOperand(0)); + break; + case ISD::ATOMIC_LOAD: { + // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. + SDValue Zero = DAG.getConstant(0, dl, Node->getValueType(0)); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(0)); + Results.push_back(Swap.getValue(1)); + break; + } + case ISD::ATOMIC_STORE: { + // There is no libcall for atomic store; fake it with ATOMIC_SWAP. + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(Node)->getMemoryVT(), + Node->getOperand(0), + Node->getOperand(1), Node->getOperand(2), + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + Results.push_back(Swap.getValue(1)); + break; + } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and + // splits out the success value as a comparison. Expanding the resulting + // ATOMIC_CMP_SWAP will produce a libcall. + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getSuccessOrdering(), + cast<AtomicSDNode>(Node)->getFailureOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + + SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), + Res, Node->getOperand(2), ISD::SETEQ); + + Results.push_back(Res.getValue(0)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } + case ISD::DYNAMIC_STACKALLOC: + ExpandDYNAMIC_STACKALLOC(Node, Results); + break; + case ISD::MERGE_VALUES: + for (unsigned i = 0; i < Node->getNumValues(); i++) + Results.push_back(Node->getOperand(i)); + break; + case ISD::UNDEF: { + EVT VT = Node->getValueType(0); + if (VT.isInteger()) + Results.push_back(DAG.getConstant(0, dl, VT)); + else { + assert(VT.isFloatingPoint() && "Unknown value type!"); + Results.push_back(DAG.getConstantFP(0, dl, VT)); + } + break; + } + case ISD::FP_ROUND: + case ISD::BITCAST: + Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_EXTEND: + Tmp1 = EmitStackConvert(Node->getOperand(0), + Node->getOperand(0).getValueType(), + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::SIGN_EXTEND_INREG: { + // NOTE: we could fall back on load/store here too for targets without + // SAR. However, it is doubtful that any exist. + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + EVT VT = Node->getValueType(0); + EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + if (VT.isVector()) + ShiftAmountTy = VT; + unsigned BitsDiff = VT.getScalarType().getSizeInBits() - + ExtraVT.getScalarType().getSizeInBits(); + SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy); + Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0), + Node->getOperand(0), ShiftCst); + Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst); + Results.push_back(Tmp1); + break; + } + case ISD::FP_ROUND_INREG: { + // The only way we can lower this is to turn it into a TRUNCSTORE, + // EXTLOAD pair, targeting a temporary location (a stack slot). + + // NOTE: there is a choice here between constantly creating new stack + // slots and always reusing the same one. We currently always create + // new ones, as reuse may inhibit scheduling. + EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); + Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT, + Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + } + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node->getOperand(0), Node->getValueType(0), dl); + Results.push_back(Tmp1); + break; + case ISD::FP_TO_SINT: + if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) + Results.push_back(Tmp1); + break; + case ISD::FP_TO_UINT: { + SDValue True, False; + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + APFloat apf(DAG.EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); + APInt x = APInt::getSignBit(NVT.getSizeInBits()); + (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); + Tmp1 = DAG.getConstantFP(apf, dl, VT); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), + Node->getOperand(0), + Tmp1, ISD::SETLT); + True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); + // TODO: Should any fast-math-flags be set for the FSUB? + False = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, + DAG.getNode(ISD::FSUB, dl, VT, + Node->getOperand(0), Tmp1)); + False = DAG.getNode(ISD::XOR, dl, NVT, False, + DAG.getConstant(x, dl, NVT)); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); + Results.push_back(Tmp1); + break; + } + case ISD::VAARG: + Results.push_back(DAG.expandVAArg(Node)); + Results.push_back(Results[0].getValue(1)); + break; + case ISD::VACOPY: + Results.push_back(DAG.expandVACopy(Node)); + break; + case ISD::EXTRACT_VECTOR_ELT: + if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) + // This must be an access of the only element. Return it. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), + Node->getOperand(0)); + else + Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); + Results.push_back(Tmp1); + break; + case ISD::EXTRACT_SUBVECTOR: + Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::INSERT_SUBVECTOR: + Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0))); + break; + case ISD::CONCAT_VECTORS: { + Results.push_back(ExpandVectorBuildThroughStack(Node)); + break; + } + case ISD::SCALAR_TO_VECTOR: + Results.push_back(ExpandSCALAR_TO_VECTOR(Node)); + break; + case ISD::INSERT_VECTOR_ELT: + Results.push_back(ExpandINSERT_VECTOR_ELT(Node->getOperand(0), + Node->getOperand(1), + Node->getOperand(2), dl)); + break; + case ISD::VECTOR_SHUFFLE: { + SmallVector<int, 32> NewMask; + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); + + EVT VT = Node->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + if (!TLI.isTypeLegal(EltVT)) { + + EVT NewEltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT); + + // BUILD_VECTOR operands are allowed to be wider than the element type. + // But if NewEltVT is smaller that EltVT the BUILD_VECTOR does not accept + // it. + if (NewEltVT.bitsLT(EltVT)) { + + // Convert shuffle node. + // If original node was v4i64 and the new EltVT is i32, + // cast operands to v8i32 and re-build the mask. + + // Calculate new VT, the size of the new VT should be equal to original. + EVT NewVT = + EVT::getVectorVT(*DAG.getContext(), NewEltVT, + VT.getSizeInBits() / NewEltVT.getSizeInBits()); + assert(NewVT.bitsEq(VT)); + + // cast operands to new VT + Op0 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op0); + Op1 = DAG.getNode(ISD::BITCAST, dl, NewVT, Op1); + + // Convert the shuffle mask + unsigned int factor = + NewVT.getVectorNumElements()/VT.getVectorNumElements(); + + // EltVT gets smaller + assert(factor > 0); + + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { + if (Mask[i] < 0) { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]); + } + else { + for (unsigned fi = 0; fi < factor; ++fi) + NewMask.push_back(Mask[i]*factor+fi); + } + } + Mask = NewMask; + VT = NewVT; + } + EltVT = NewEltVT; + } + unsigned NumElems = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0; i != NumElems; ++i) { + if (Mask[i] < 0) { + Ops.push_back(DAG.getUNDEF(EltVT)); + continue; + } + unsigned Idx = Mask[i]; + if (Idx < NumElems) + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + else + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, + DAG.getConstant(Idx - NumElems, dl, + TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + + Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + // We may have changed the BUILD_VECTOR type. Cast it back to the Node type. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::EXTRACT_ELEMENT: { + EVT OpTy = Node->getOperand(0).getValueType(); + if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) { + // 1 -> Hi + Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), + DAG.getConstant(OpTy.getSizeInBits() / 2, dl, + TLI.getShiftAmountTy( + Node->getOperand(0).getValueType(), + DAG.getDataLayout()))); + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); + } else { + // 0 -> Lo + Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), + Node->getOperand(0)); + } + Results.push_back(Tmp1); + break; + } + case ISD::STACKSAVE: + // Expand to CopyFromReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, SP, + Node->getValueType(0))); + Results.push_back(Results[0].getValue(1)); + } else { + Results.push_back(DAG.getUNDEF(Node->getValueType(0))); + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::STACKRESTORE: + // Expand to CopyToReg if the target set + // StackPointerRegisterToSaveRestore. + if (unsigned SP = TLI.getStackPointerRegisterToSaveRestore()) { + Results.push_back(DAG.getCopyToReg(Node->getOperand(0), dl, SP, + Node->getOperand(1))); + } else { + Results.push_back(Node->getOperand(0)); + } + break; + case ISD::GET_DYNAMIC_AREA_OFFSET: + Results.push_back(DAG.getConstant(0, dl, Node->getValueType(0))); + Results.push_back(Results[0].getValue(0)); + break; + case ISD::FCOPYSIGN: + Results.push_back(ExpandFCOPYSIGN(Node)); + break; + case ISD::FNEG: + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + Tmp1 = DAG.getConstantFP(-0.0, dl, Node->getValueType(0)); + // TODO: If FNEG has fast-math-flags, propagate them to the FSUB. + Tmp1 = DAG.getNode(ISD::FSUB, dl, Node->getValueType(0), Tmp1, + Node->getOperand(0)); + Results.push_back(Tmp1); + break; + case ISD::FABS: + Results.push_back(ExpandFABS(Node)); + break; + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: { + // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B + ISD::CondCode Pred; + switch (Node->getOpcode()) { + default: llvm_unreachable("How did we get here?"); + case ISD::SMAX: Pred = ISD::SETGT; break; + case ISD::SMIN: Pred = ISD::SETLT; break; + case ISD::UMAX: Pred = ISD::SETUGT; break; + case ISD::UMIN: Pred = ISD::SETULT; break; + } + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp1, Tmp2, Pred); + Results.push_back(Tmp1); + break; + } + + case ISD::FSIN: + case ISD::FCOS: { + EVT VT = Node->getValueType(0); + // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / + // fcos which share the same operand and both are used. + if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || + canCombineSinCosLibcall(Node, TLI, TM)) + && useSinCos(Node)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); + if (Node->getOpcode() == ISD::FCOS) + Tmp1 = Tmp1.getValue(1); + Results.push_back(Tmp1); + } + break; + } + case ISD::FMAD: + llvm_unreachable("Illegal fmad should never be formed"); + + case ISD::FP16_TO_FP: + if (Node->getValueType(0) != MVT::f32) { + // We can extend to types bigger than f32 in two steps without changing + // the result. Since "f16 -> f32" is much more commonly available, give + // CodeGen the option of emitting that before resorting to a libcall. + SDValue Res = + DAG.getNode(ISD::FP16_TO_FP, dl, MVT::f32, Node->getOperand(0)); + Results.push_back( + DAG.getNode(ISD::FP_EXTEND, dl, Node->getValueType(0), Res)); + } + break; + case ISD::FP_TO_FP16: + if (!TLI.useSoftFloat() && TM.Options.UnsafeFPMath) { + SDValue Op = Node->getOperand(0); + MVT SVT = Op.getSimpleValueType(); + if ((SVT == MVT::f64 || SVT == MVT::f80) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_FP16, MVT::f32)) { + // Under fastmath, we can expand this node into a fround followed by + // a float-half conversion. + SDValue FloatVal = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Op, + DAG.getIntPtrConstant(0, dl)); + Results.push_back( + DAG.getNode(ISD::FP_TO_FP16, dl, MVT::i16, FloatVal)); + } + } + break; + case ISD::ConstantFP: { + ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node); + // Check to see if this FP immediate is already legal. + // If this is a legal constant, turn it into a TargetConstantFP node. + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); + break; + } + case ISD::Constant: { + ConstantSDNode *CP = cast<ConstantSDNode>(Node); + Results.push_back(ExpandConstant(CP)); + break; + } + case ISD::FSUB: { + EVT VT = Node->getValueType(0); + if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && + TLI.isOperationLegalOrCustom(ISD::FNEG, VT)) { + const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(Node)->Flags; + Tmp1 = DAG.getNode(ISD::FNEG, dl, VT, Node->getOperand(1)); + Tmp1 = DAG.getNode(ISD::FADD, dl, VT, Node->getOperand(0), Tmp1, Flags); + Results.push_back(Tmp1); + } + break; + } + case ISD::SUB: { + EVT VT = Node->getValueType(0); + assert(TLI.isOperationLegalOrCustom(ISD::ADD, VT) && + TLI.isOperationLegalOrCustom(ISD::XOR, VT) && + "Don't know how to expand this subtraction!"); + Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Node->getOperand(1), + DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), dl, + VT)); + Tmp1 = DAG.getNode(ISD::ADD, dl, VT, Tmp1, DAG.getConstant(1, dl, VT)); + Results.push_back(DAG.getNode(ISD::ADD, dl, VT, Node->getOperand(0), Tmp1)); + break; + } + case ISD::UREM: + case ISD::SREM: { + EVT VT = Node->getValueType(0); + bool isSigned = Node->getOpcode() == ISD::SREM; + unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + Tmp2 = Node->getOperand(0); + Tmp3 = Node->getOperand(1); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); + Results.push_back(Tmp1); + } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { + // X % Y -> X-X/Y*Y + Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); + Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); + Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); + Results.push_back(Tmp1); + } + break; + } + case ISD::UDIV: + case ISD::SDIV: { + bool isSigned = Node->getOpcode() == ISD::SDIV; + unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; + EVT VT = Node->getValueType(0); + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1); + } + break; + } + case ISD::MULHU: + case ISD::MULHS: { + unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : + ISD::SMUL_LOHI; + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) && + "If this wasn't legal, it shouldn't have been created!"); + Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0), + Node->getOperand(1)); + Results.push_back(Tmp1.getValue(1)); + break; + } + case ISD::MUL: { + EVT VT = Node->getValueType(0); + SDVTList VTs = DAG.getVTList(VT, VT); + // See if multiply or divide can be lowered using two-result operations. + // We just need the low half of the multiply; try both the signed + // and unsigned forms. If the target supports both SMUL_LOHI and + // UMUL_LOHI, form a preference by checking which forms of plain + // MULH it supports. + bool HasSMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::SMUL_LOHI, VT); + bool HasUMUL_LOHI = TLI.isOperationLegalOrCustom(ISD::UMUL_LOHI, VT); + bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, VT); + bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, VT); + unsigned OpToUse = 0; + if (HasSMUL_LOHI && !HasMULHS) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI && !HasMULHU) { + OpToUse = ISD::UMUL_LOHI; + } else if (HasSMUL_LOHI) { + OpToUse = ISD::SMUL_LOHI; + } else if (HasUMUL_LOHI) { + OpToUse = ISD::UMUL_LOHI; + } + if (OpToUse) { + Results.push_back(DAG.getNode(OpToUse, dl, VTs, Node->getOperand(0), + Node->getOperand(1))); + break; + } + + SDValue Lo, Hi; + EVT HalfType = VT.getHalfSizedIntegerVT(*DAG.getContext()); + if (TLI.isOperationLegalOrCustom(ISD::ZERO_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) && + TLI.isOperationLegalOrCustom(ISD::SHL, VT) && + TLI.isOperationLegalOrCustom(ISD::OR, VT) && + TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi); + SDValue Shift = + DAG.getConstant(HalfType.getSizeInBits(), dl, + TLI.getShiftAmountTy(HalfType, DAG.getDataLayout())); + Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift); + Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi)); + } + break; + } + case ISD::SADDO: + case ISD::SSUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType(Node->getValueType(0)); + + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); + break; + } + case ISD::UADDO: + case ISD::USUBO: { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + Results.push_back(Sum); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC + = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); + break; + } + case ISD::UMULO: + case ISD::SMULO: { + EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDValue BottomHalf; + SDValue TopHalf; + static const unsigned Ops[2][3] = + { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND }, + { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }}; + bool isSigned = Node->getOpcode() == ISD::SMULO; + if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) { + BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS); + TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS); + } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) { + BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS, + RHS); + TopHalf = BottomHalf.getValue(1); + } else if (TLI.isTypeLegal(WideVT)) { + LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); + RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(0, dl)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1, + DAG.getIntPtrConstant(1, dl)); + } else { + // We can fall back to a libcall with an illegal type for the MUL if we + // have a libcall big enough. + // Also, we can fall back to a division in some cases, but that's a big + // performance hit in the general case. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (WideVT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (WideVT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (WideVT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (WideVT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); + + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = + DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + SDValue HiRHS = + DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0, dl)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1, dl)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, make sure the + // node has no more uses. The above EXTRACT_ELEMENT nodes should have been + // folded. + assert(Ret->use_empty() && + "Unexpected uses of illegally type from expanded lib call."); + } + + if (isSigned) { + Tmp1 = DAG.getConstant( + VT.getSizeInBits() - 1, dl, + TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout())); + Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, + ISD::SETNE); + } else { + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, + DAG.getConstant(0, dl, VT), ISD::SETNE); + } + Results.push_back(BottomHalf); + Results.push_back(TopHalf); + break; + } + case ISD::BUILD_PAIR: { + EVT PairTy = Node->getValueType(0); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); + Tmp2 = DAG.getNode( + ISD::SHL, dl, PairTy, Tmp2, + DAG.getConstant(PairTy.getSizeInBits() / 2, dl, + TLI.getShiftAmountTy(PairTy, DAG.getDataLayout()))); + Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); + break; + } + case ISD::SELECT: + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + if (Tmp1.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getSelectCC(dl, Tmp1.getOperand(0), Tmp1.getOperand(1), + Tmp2, Tmp3, + cast<CondCodeSDNode>(Tmp1.getOperand(2))->get()); + } else { + Tmp1 = DAG.getSelectCC(dl, Tmp1, + DAG.getConstant(0, dl, Tmp1.getValueType()), + Tmp2, Tmp3, ISD::SETNE); + } + Results.push_back(Tmp1); + break; + case ISD::BR_JT: { + SDValue Chain = Node->getOperand(0); + SDValue Table = Node->getOperand(1); + SDValue Index = Node->getOperand(2); + + EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + + const DataLayout &TD = DAG.getDataLayout(); + unsigned EntrySize = + DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EntrySize, dl, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); + + EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); + SDValue LD = DAG.getExtLoad( + ISD::SEXTLOAD, dl, PTy, Chain, Addr, + MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT, + false, false, false, 0); + Addr = LD; + if (TM.getRelocationModel() == Reloc::PIC_) { + // For PIC, the sequence is: + // BRIND(load(Jumptable + index) + RelocBase) + // RelocBase can be JumpTable, GOT or some sort of global base. + Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, + TLI.getPICJumpTableRelocBase(Table, DAG)); + } + Tmp1 = DAG.getNode(ISD::BRIND, dl, MVT::Other, LD.getValue(1), Addr); + Results.push_back(Tmp1); + break; + } + case ISD::BRCOND: + // Expand brcond's setcc into its constituent parts and create a BR_CC + // Node. + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + if (Tmp2.getOpcode() == ISD::SETCC) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, + Tmp1, Tmp2.getOperand(2), + Tmp2.getOperand(0), Tmp2.getOperand(1), + Node->getOperand(2)); + } else { + // We test only the i1 bit. Skip the AND if UNDEF. + Tmp3 = (Tmp2.getOpcode() == ISD::UNDEF) ? Tmp2 : + DAG.getNode(ISD::AND, dl, Tmp2.getValueType(), Tmp2, + DAG.getConstant(1, dl, Tmp2.getValueType())); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, MVT::Other, Tmp1, + DAG.getCondCode(ISD::SETNE), Tmp3, + DAG.getConstant(0, dl, Tmp3.getValueType()), + Node->getOperand(2)); + } + Results.push_back(Tmp1); + break; + case ISD::SETCC: { + Tmp1 = Node->getOperand(0); + Tmp2 = Node->getOperand(1); + Tmp3 = Node->getOperand(2); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getLogicalNOT(dl, Tmp1, Tmp1->getValueType(0)); + + Results.push_back(Tmp1); + break; + } + + // Otherwise, SETCC for the given comparison type must be completely + // illegal; expand it into a SELECT_CC. + EVT VT = Node->getValueType(0); + int TrueValue; + switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = 1; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = -1; + break; + } + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, + DAG.getConstant(TrueValue, dl, VT), + DAG.getConstant(0, dl, VT), + Tmp3); + Results.push_back(Tmp1); + break; + } + case ISD::SELECT_CC: { + Tmp1 = Node->getOperand(0); // LHS + Tmp2 = Node->getOperand(1); // RHS + Tmp3 = Node->getOperand(2); // True + Tmp4 = Node->getOperand(3); // False + EVT VT = Node->getValueType(0); + SDValue CC = Node->getOperand(4); + ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); + + if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + // If the condition code is legal, then we need to expand this + // node using SETCC and SELECT. + EVT CmpVT = Tmp1.getValueType(); + assert(!TLI.isOperationExpand(ISD::SELECT, VT) && + "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " + "expanded."); + EVT CCVT = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); + Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + break; + } + + // SELECT_CC is legal, so the condition code must not be. + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); + + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, dl, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, + Tmp2, Tmp3, Tmp4, CC); + } + } + Results.push_back(Tmp1); + break; + } + case ISD::BR_CC: { + Tmp1 = Node->getOperand(0); // Chain + Tmp2 = Node->getOperand(2); // LHS + Tmp3 = Node->getOperand(3); // RHS + Tmp4 = Node->getOperand(1); // CC + + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, dl, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, + Tmp2, Tmp3, Node->getOperand(4)); + } + Results.push_back(Tmp1); + break; + } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector<SDValue, 8> Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(0), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Sh = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), Node->getOperand(1), + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Scalars); + ReplaceNode(SDValue(Node, 0), Result); + break; + } + case ISD::GLOBAL_OFFSET_TABLE: + case ISD::GlobalAddress: + case ISD::GlobalTLSAddress: + case ISD::ExternalSymbol: + case ISD::ConstantPool: + case ISD::JumpTable: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + // FIXME: Custom lowering for these operations shouldn't return null! + break; + } + + // Replace the original node with the legalized result. + if (Results.empty()) + return false; + + ReplaceNode(Node, Results.data()); + return true; +} + +void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { + SmallVector<SDValue, 8> Results; + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + unsigned Opc = Node->getOpcode(); + switch (Opc) { + case ISD::ATOMIC_FENCE: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + // FIXME: handle "fence singlethread" more efficiently. + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + std::pair<SDValue, SDValue> Tmp = ExpandChainLibCall(LC, Node, false); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } + case ISD::TRAP: { + // If this operation is not supported, lower it to 'abort()' call + TargetLowering::ArgListTy Args; + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl) + .setChain(Node->getOperand(0)) + .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), + DAG.getExternalSymbol("abort", + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); + + Results.push_back(CallResult.second); + break; + } + case ISD::FMINNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128)); + break; + case ISD::FMAXNUM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128)); + break; + case ISD::FSQRT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); + break; + case ISD::FSIN: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + break; + case ISD::FCOS: + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + break; + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); + break; + case ISD::FLOG: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); + break; + case ISD::FLOG2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); + break; + case ISD::FLOG10: + Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); + break; + case ISD::FEXP: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); + break; + case ISD::FEXP2: + Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); + break; + case ISD::FTRUNC: + Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); + break; + case ISD::FFLOOR: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); + break; + case ISD::FCEIL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); + break; + case ISD::FRINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); + break; + case ISD::FNEARBYINT: + Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128)); + break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; + case ISD::FPOWI: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); + break; + case ISD::FPOW: + Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); + break; + case ISD::FDIV: + Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); + break; + case ISD::FREM: + Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); + break; + case ISD::FMA: + Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); + break; + case ISD::FADD: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128)); + break; + case ISD::FMUL: + Results.push_back(ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64, + RTLIB::MUL_F80, RTLIB::MUL_F128, + RTLIB::MUL_PPCF128)); + break; + case ISD::FP16_TO_FP: + if (Node->getValueType(0) == MVT::f32) { + Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); + } + break; + case ISD::FP_TO_FP16: { + RTLIB::Libcall LC = + RTLIB::getFPROUND(Node->getOperand(0).getValueType(), MVT::f16); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unable to expand fp_to_fp16"); + Results.push_back(ExpandLibCall(LC, Node, false)); + break; + } + case ISD::FSUB: + Results.push_back(ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64, + RTLIB::SUB_F80, RTLIB::SUB_F128, + RTLIB::SUB_PPCF128)); + break; + case ISD::SREM: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SREM_I8, + RTLIB::SREM_I16, RTLIB::SREM_I32, + RTLIB::SREM_I64, RTLIB::SREM_I128)); + break; + case ISD::UREM: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UREM_I8, + RTLIB::UREM_I16, RTLIB::UREM_I32, + RTLIB::UREM_I64, RTLIB::UREM_I128)); + break; + case ISD::SDIV: + Results.push_back(ExpandIntLibCall(Node, true, + RTLIB::SDIV_I8, + RTLIB::SDIV_I16, RTLIB::SDIV_I32, + RTLIB::SDIV_I64, RTLIB::SDIV_I128)); + break; + case ISD::UDIV: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::UDIV_I8, + RTLIB::UDIV_I16, RTLIB::UDIV_I32, + RTLIB::UDIV_I64, RTLIB::UDIV_I128)); + break; + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; + case ISD::MUL: + Results.push_back(ExpandIntLibCall(Node, false, + RTLIB::MUL_I8, + RTLIB::MUL_I16, RTLIB::MUL_I32, + RTLIB::MUL_I64, RTLIB::MUL_I128)); + break; + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); +} + +// Determine the vector type to use in place of an original scalar element when +// promoting equally sized vectors. +static MVT getPromotedVectorElementType(const TargetLowering &TLI, + MVT EltVT, MVT NewEltVT) { + unsigned OldEltsPerNewElt = EltVT.getSizeInBits() / NewEltVT.getSizeInBits(); + MVT MidVT = MVT::getVectorVT(NewEltVT, OldEltsPerNewElt); + assert(TLI.isTypeLegal(MidVT) && "unexpected"); + return MidVT; +} + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector<SDValue, 8> Results; + MVT OVT = Node->getSimpleValueType(0); + if (Node->getOpcode() == ISD::UINT_TO_FP || + Node->getOpcode() == ISD::SINT_TO_FP || + Node->getOpcode() == ISD::SETCC || + Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || + Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + OVT = Node->getOperand(0).getSimpleValueType(); + } + if (Node->getOpcode() == ISD::BR_CC) + OVT = Node->getOperand(2).getSimpleValueType(); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + SDLoc dl(Node); + SDValue Tmp1, Tmp2, Tmp3; + switch (Node->getOpcode()) { + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + // Zero extend the argument. + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + // Perform the larger operation. For CTPOP and CTTZ_ZERO_UNDEF, this is + // already the correct result. + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + if (Node->getOpcode() == ISD::CTTZ) { + // FIXME: This should set a bit in the zero extended value instead. + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), + Tmp1, DAG.getConstant(NVT.getSizeInBits(), dl, NVT), + ISD::SETEQ); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), dl, NVT), Tmp1); + } else if (Node->getOpcode() == ISD::CTLZ || + Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { + // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) + Tmp1 = DAG.getNode(ISD::SUB, dl, NVT, Tmp1, + DAG.getConstant(NVT.getSizeInBits() - + OVT.getSizeInBits(), dl, NVT)); + } + Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1)); + break; + case ISD::BSWAP: { + unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); + Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); + Tmp1 = DAG.getNode( + ISD::SRL, dl, NVT, Tmp1, + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); + Results.push_back(Tmp1); + break; + } + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + Tmp1 = PromoteLegalFP_TO_INT(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::FP_TO_SINT, dl); + Results.push_back(Tmp1); + break; + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + Tmp1 = PromoteLegalINT_TO_FP(Node->getOperand(0), Node->getValueType(0), + Node->getOpcode() == ISD::SINT_TO_FP, dl); + Results.push_back(Tmp1); + break; + case ISD::VAARG: { + SDValue Chain = Node->getOperand(0); // Get the chain. + SDValue Ptr = Node->getOperand(1); // Get the pointer. + + unsigned TruncOp; + if (OVT.isVector()) { + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() + && "VAARG promotion is supported only for vectors or integer types"); + TruncOp = ISD::TRUNCATE; + } + + // Perform the larger operation, then convert back + Tmp1 = DAG.getVAArg(NVT, dl, Chain, Ptr, Node->getOperand(2), + Node->getConstantOperandVal(3)); + Chain = Tmp1.getValue(1); + + Tmp2 = DAG.getNode(TruncOp, dl, OVT, Tmp1); + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Chain); + if (UpdatedNodes) { + UpdatedNodes->insert(Tmp2.getNode()); + UpdatedNodes->insert(Chain.getNode()); + } + ReplacedNode(Node); + break; + } + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + unsigned ExtOp, TruncOp; + if (OVT.isVector()) { + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; + } else { + assert(OVT.isInteger() && "Cannot promote logic operation"); + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } + // Promote each of the values to the new type. + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + // Perform the larger operation, then convert back + Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1)); + break; + } + case ISD::SELECT: { + unsigned ExtOp, TruncOp; + if (Node->getValueType(0).isVector() || + Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) { + ExtOp = ISD::BITCAST; + TruncOp = ISD::BITCAST; + } else if (Node->getValueType(0).isInteger()) { + ExtOp = ISD::ANY_EXTEND; + TruncOp = ISD::TRUNCATE; + } else { + ExtOp = ISD::FP_EXTEND; + TruncOp = ISD::FP_ROUND; + } + Tmp1 = Node->getOperand(0); + // Promote each of the values to the new type. + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + // Perform the larger operation, then round down. + Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); + if (TruncOp != ISD::FP_ROUND) + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); + else + Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1, + DAG.getIntPtrConstant(0, dl)); + Results.push_back(Tmp1); + break; + } + case ISD::VECTOR_SHUFFLE: { + ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Node)->getMask(); + + // Cast the two input vectors. + Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1)); + + // Convert the shuffle mask to the right # elements. + Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1); + Results.push_back(Tmp1); + break; + } + case ISD::SETCC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(2))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); + Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Node->getOperand(2))); + break; + } + case ISD::BR_CC: { + unsigned ExtOp = ISD::FP_EXTEND; + if (NVT.isInteger()) { + ISD::CondCode CCCode = + cast<CondCodeSDNode>(Node->getOperand(1))->get(); + ExtOp = isSignedIntSetCC(CCCode) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + } + Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); + Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(3)); + Results.push_back(DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), + Node->getOperand(0), Node->getOperand(1), + Tmp1, Tmp2, Node->getOperand(4))); + break; + } + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FPOW: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, + Node->getFlags()); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::FMA: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(1)); + Tmp3 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(2)); + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, + DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3), + DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::FCOPYSIGN: + case ISD::FPOWI: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = Node->getOperand(1); + Tmp3 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2); + + // fcopysign doesn't change anything but the sign bit, so + // (fp_round (fcopysign (fpext a), b)) + // is as precise as + // (fp_round (fpext a)) + // which is a no-op. Mark it as a TRUNCating FP_ROUND. + const bool isTrunc = (Node->getOpcode() == ISD::FCOPYSIGN); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp3, DAG.getIntPtrConstant(isTrunc, dl))); + break; + } + case ISD::FFLOOR: + case ISD::FCEIL: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FTRUNC: + case ISD::FNEG: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FABS: + case ISD::FEXP: + case ISD::FEXP2: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); + Results.push_back(DAG.getNode(ISD::FP_ROUND, dl, OVT, + Tmp2, DAG.getIntPtrConstant(0, dl))); + break; + } + case ISD::BUILD_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = build_vector i64:x, i64:y => v4i32 + // => + // v4i32 = concat_vectors (v2i32 (bitcast i64:x)), (v2i32 (bitcast i64:y)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for build_vector"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) { + SDValue Op = Node->getOperand(I); + NewOps.push_back(DAG.getNode(ISD::BITCAST, SDLoc(Op), MidVT, Op)); + } + + SDLoc SL(Node); + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewOps); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + case ISD::EXTRACT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size. + // + // e.g. v2i64 = extract_vector_elt x:v2i64, y:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // + // i64 = bitcast + // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))), + // (i32 (extract_vector_elt castx, (2 * y + 1))) + // + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for extract_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Idx = Node->getOperand(1); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SL, IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + + SmallVector<SDValue, 8> NewOps; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue TmpIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVec, TmpIdx); + NewOps.push_back(Elt); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, SL, MidVT, NewOps); + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, EltVT, NewVec)); + break; + } + case ISD::INSERT_VECTOR_ELT: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to a different vector type with the same total bit size + // + // e.g. v2i64 = insert_vector_elt x:v2i64, y:i64, z:i32 + // => + // v4i32:castx = bitcast x:v2i64 + // v2i32:casty = bitcast y:i64 + // + // v2i64 = bitcast + // (v4i32 insert_vector_elt + // (v4i32 insert_vector_elt v4i32:castx, + // (extract_vector_elt casty, 0), 2 * z), + // (extract_vector_elt casty, 1), (2 * z + 1)) + + assert(NVT.isVector() && OVT.getSizeInBits() == NVT.getSizeInBits() && + "Invalid promote type for insert_vector_elt"); + assert(NewEltVT.bitsLT(EltVT) && "not handled"); + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + unsigned NewEltsPerOldElt = MidVT.getVectorNumElements(); + + SDValue Val = Node->getOperand(1); + SDValue Idx = Node->getOperand(2); + EVT IdxVT = Idx.getValueType(); + SDLoc SL(Node); + + SDValue Factor = DAG.getConstant(NewEltsPerOldElt, SDLoc(), IdxVT); + SDValue NewBaseIdx = DAG.getNode(ISD::MUL, SL, IdxVT, Idx, Factor); + + SDValue CastVec = DAG.getNode(ISD::BITCAST, SL, NVT, Node->getOperand(0)); + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + + SDValue NewVec = CastVec; + for (unsigned I = 0; I < NewEltsPerOldElt; ++I) { + SDValue IdxOffset = DAG.getConstant(I, SL, IdxVT); + SDValue InEltIdx = DAG.getNode(ISD::ADD, SL, IdxVT, NewBaseIdx, IdxOffset); + + SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, NewEltVT, + CastVal, IdxOffset); + + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, SL, NVT, + NewVec, Elt, InEltIdx); + } + + Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewVec)); + break; + } + case ISD::SCALAR_TO_VECTOR: { + MVT EltVT = OVT.getVectorElementType(); + MVT NewEltVT = NVT.getVectorElementType(); + + // Handle bitcasts to different vector type with the smae total bit size. + // + // e.g. v2i64 = scalar_to_vector x:i64 + // => + // concat_vectors (v2i32 bitcast x:i64), (v2i32 undef) + // + + MVT MidVT = getPromotedVectorElementType(TLI, EltVT, NewEltVT); + SDValue Val = Node->getOperand(0); + SDLoc SL(Node); + + SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, MidVT, Val); + SDValue Undef = DAG.getUNDEF(MidVT); + + SmallVector<SDValue, 8> NewElts; + NewElts.push_back(CastVal); + for (unsigned I = 1, NElts = OVT.getVectorNumElements(); I != NElts; ++I) + NewElts.push_back(Undef); + + SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SL, NVT, NewElts); + SDValue CvtVec = DAG.getNode(ISD::BITCAST, SL, OVT, Concat); + Results.push_back(CvtVec); + break; + } + } + + // Replace the original node with the legalized result. + if (!Results.empty()) + ReplaceNode(Node, Results.data()); +} + +/// This is the entry point for the file. +void SelectionDAG::Legalize() { + AssignTopologicalOrder(); + + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes); + + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (auto NI = allnodes_end(); NI != allnodes_begin();) { + --NI; + + SDNode *N = &*NI; + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + continue; + } + + if (LegalizedNodes.insert(N).second) { + AnyLegalized = true; + Legalizer.LegalizeOp(N); + + if (N->use_empty() && N != getRoot().getNode()) { + ++NI; + DeleteNode(N); + } + } + } + if (!AnyLegalized) + break; + + } + + // Remove dead nodes now. + RemoveDeadNodes(); +} + +bool SelectionDAG::LegalizeOp(SDNode *N, + SmallSetVector<SDNode *, 16> &UpdatedNodes) { + SmallPtrSet<SDNode *, 16> LegalizedNodes; + SelectionDAGLegalize Legalizer(*this, LegalizedNodes, &UpdatedNodes); + + // Directly insert the node in question, and legalize it. This will recurse + // as needed through operands. + LegalizedNodes.insert(N); + Legalizer.LegalizeOp(N); + + return LegalizedNodes.count(N); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp new file mode 100644 index 0000000..6c0193a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -0,0 +1,2114 @@ +//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements float type expansion and softening for LegalizeTypes. +// Softening is the act of turning a computation in an illegal floating point +// type into a computation in an integer type of the same size; also known as +// "soft float". For example, turning f32 arithmetic into operations using i32. +// The resulting integer value is the same as what you would get by performing +// the floating point operation and bitcasting the result to the integer type. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. For example, +// implementing ppcf128 arithmetic in two f64 registers. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +/// GetFPLibCall - Return the right libcall for the given floating point type. +static RTLIB::Libcall GetFPLibCall(EVT VT, + RTLIB::Libcall Call_F32, + RTLIB::Libcall Call_F64, + RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, + RTLIB::Libcall Call_PPCF128) { + return + VT == MVT::f32 ? Call_F32 : + VT == MVT::f64 ? Call_F64 : + VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : + VT == MVT::ppcf128 ? Call_PPCF128 : + RTLIB::UNKNOWN_LIBCALL; +} + +//===----------------------------------------------------------------------===// +// Convert Float Results to Integer for Non-HW-supported Operations. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SoftenFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften the result of this operator!"); + + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); + break; + case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; + case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; + case ISD::EXTRACT_VECTOR_ELT: + R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; + case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; + case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; + case ISD::FADD: R = SoftenFloatRes_FADD(N); break; + case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; + case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; + case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; + case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; + case ISD::FEXP2: R = SoftenFloatRes_FEXP2(N); break; + case ISD::FFLOOR: R = SoftenFloatRes_FFLOOR(N); break; + case ISD::FLOG: R = SoftenFloatRes_FLOG(N); break; + case ISD::FLOG2: R = SoftenFloatRes_FLOG2(N); break; + case ISD::FLOG10: R = SoftenFloatRes_FLOG10(N); break; + case ISD::FMA: R = SoftenFloatRes_FMA(N); break; + case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; + case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; + case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; + case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; + case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; + case ISD::FPOW: R = SoftenFloatRes_FPOW(N); break; + case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; + case ISD::FREM: R = SoftenFloatRes_FREM(N); break; + case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; + case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; + case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; + case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; + case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; + case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) { + SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + return BitConvertToInteger(N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return BitConvertToInteger(Op); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { + // Convert the inputs to integers, and build a new pair out of them. + return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), + BitConvertToInteger(N->getOperand(0)), + BitConvertToInteger(N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), + TLI.getTypeToTransformTo(*DAG.getContext(), + CN->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + NewOp.getValueType().getVectorElementType(), + NewOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned Size = NVT.getSizeInBits(); + + // Mask = ~(1 << (Size-1)) + APInt API = APInt::getAllOnesValue(Size); + API.clearBit(Size - 1); + SDValue Mask = DAG.getConstant(API, SDLoc(N), NVT); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, + RTLIB::FMIN_F64, + RTLIB::FMIN_F80, + RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, + RTLIB::FMAX_F64, + RTLIB::FMAX_F80, + RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + SDValue LHS = GetSoftenedFloat(N->getOperand(0)); + SDValue RHS = BitConvertToInteger(N->getOperand(1)); + SDLoc dl(N); + + EVT LVT = LHS.getValueType(); + EVT RVT = RHS.getValueType(); + + unsigned LSize = LVT.getSizeInBits(); + unsigned RSize = RVT.getSizeInBits(); + + // First get the sign bit of second operand. + SDValue SignBit = DAG.getNode( + ISD::SHL, dl, RVT, DAG.getConstant(1, dl, RVT), + DAG.getConstant(RSize - 1, dl, + TLI.getShiftAmountTy(RVT, DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); + + // Shift right or sign-extend it if the two operands have different types. + int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); + if (SizeDiff > 0) { + SignBit = + DAG.getNode(ISD::SRL, dl, RVT, SignBit, + DAG.getConstant(SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); + } else if (SizeDiff < 0) { + SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); + SignBit = + DAG.getNode(ISD::SHL, dl, LVT, SignBit, + DAG.getConstant(-SizeDiff, dl, + TLI.getShiftAmountTy(SignBit.getValueType(), + DAG.getDataLayout()))); + } + + // Clear the sign bit of the first operand. + SDValue Mask = DAG.getNode( + ISD::SHL, dl, LVT, DAG.getConstant(1, dl, LVT), + DAG.getConstant(LSize - 1, dl, + TLI.getShiftAmountTy(LVT, DAG.getDataLayout()))); + Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, dl, LVT)); + LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); + + // Or the value with the sign bit. + return DAG.getNode(ISD::OR, dl, LVT, LHS, SignBit); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)), + GetSoftenedFloat(N->getOperand(2)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + // Expand Y = FNEG(X) -> Y = SUB -0.0, X + SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)), + GetSoftenedFloat(N->getOperand(0)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, false, dl).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + + // There's only a libcall for f16 -> f32, so proceed in two stages. Also, it's + // entirely possible for both f16 and f32 to be legal, so use the fully + // hard-float FP_EXTEND rather than FP16_TO_FP. + if (Op.getValueType() == MVT::f16 && N->getValueType(0) != MVT::f32) { + Op = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), MVT::f32, Op); + if (getTypeAction(MVT::f32) == TargetLowering::TypeSoftenFloat) + SoftenFloatResult(Op.getNode(), 0); + } + + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) { + Op = GetPromotedFloat(Op); + // If the promotion did the FP_EXTEND to the destination type for us, + // there's nothing left to do here. + if (Op.getValueType() == N->getValueType(0)) { + return BitConvertToInteger(Op); + } + } + + RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypeSoftenFloat) + Op = GetSoftenedFloat(Op); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; +} + +// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special +// nodes? +SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { + EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); + SDValue Op = N->getOperand(0); + SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, + false, SDLoc(N)).first; + if (N->getValueType(0) == MVT::f32) + return Res32; + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); + return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = N->getOperand(0); + if (N->getValueType(0) == MVT::f16) { + // Semi-soften first, to FP_TO_FP16, so that targets which support f16 as a + // storage-only type get a chance to select things. + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, Op); + } + + RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); + return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::i32 && + "Unsupported power type!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), + GetSoftenedFloat(N->getOperand(1)) }; + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + if (N->getValueType(0) == MVT::f16) + return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0)); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + NVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + SDValue NewL; + if (L->getExtensionType() == ISD::NON_EXTLOAD) { + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getAAInfo()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + return NewL; + } + + // Do a non-extending load followed by FP_EXTEND. + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), + L->getBasePtr(), L->getOffset(), L->getPointerInfo(), + L->getMemoryVT(), L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getAAInfo()); + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + SDValue LHS = GetSoftenedFloat(N->getOperand(1)); + SDValue RHS = GetSoftenedFloat(N->getOperand(2)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + SDValue LHS = GetSoftenedFloat(N->getOperand(2)); + SDValue RHS = GetSoftenedFloat(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + SDValue NewVAARG; + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + + // Legalized the chain result - switch anything that used the old chain to + // use the new one. + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + return NewVAARG; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { + bool Signed = N->getOpcode() == ISD::SINT_TO_FP; + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); + SDLoc dl(N); + + // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to + // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly + // match. Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned t = MVT::FIRST_INTEGER_VALUETYPE; + t <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; ++t) { + NVT = (MVT::SimpleValueType)t; + // The source needs to big enough to hold the operand. + if (NVT.bitsGE(SVT)) + LC = Signed ? RTLIB::getSINTTOFP(NVT, RVT):RTLIB::getUINTTOFP (NVT, RVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + // Sign/zero extend the argument if the libcall takes a larger type. + SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + NVT, N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, + TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + Op, Signed, dl).first; +} + + +//===----------------------------------------------------------------------===// +// Convert Float Operand to Integer for Non-HW-supported Operations. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Soften float operand " << OpNo << ": "; N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + switch (N->getOpcode()) { + default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; +#ifndef NDEBUG + dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to soften this operator's operand!"); + + case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break; + case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break; + case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break; + case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes + case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break; + case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this to re-analyze. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), + GetSoftenedFloat(N->getOperand(0))); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { + // If we get here, the result must be legal but the source illegal. + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + + if (SVT == MVT::f16) + return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), RVT, Op); + + RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); + + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; +} + + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { + // We actually deal with the partially-softened FP_TO_FP16 node too, which + // returns an i16 so doesn't meet the constraints necessary for FP_ROUND. + assert(N->getOpcode() == ISD::FP_ROUND || N->getOpcode() == ISD::FP_TO_FP16); + + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT FloatRVT = N->getOpcode() == ISD::FP_TO_FP16 ? MVT::f16 : RVT; + + RTLIB::Libcall LC = RTLIB::getFPROUND(SVT, FloatRVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { + bool Signed = N->getOpcode() == ISD::FP_TO_SINT; + EVT SVT = N->getOperand(0).getValueType(); + EVT RVT = N->getValueType(0); + EVT NVT = EVT(); + SDLoc dl(N); + + // If the result is not legal, eg: fp -> i1, then it needs to be promoted to + // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly + // match, eg. we don't have fp -> i8 conversions. + // Look for an appropriate libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { + NVT = (MVT::SimpleValueType)IntVT; + // The type needs to big enough to hold the result. + if (NVT.bitsGE(RVT)) + LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT):RTLIB::getFPTOUINT(SVT, NVT); + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); + + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first; + + // Truncate the result if the libcall returns a larger type. + return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If softenSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If softenSetCCOperands returned a scalar, use it. + if (!NewRHS.getNode()) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); +} + +SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only soften the stored value!"); + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc dl(N); + + if (ST->isTruncatingStore()) + // Do an FP_ROUND followed by a non-truncating store. + Val = BitConvertToInteger(DAG.getNode(ISD::FP_ROUND, dl, ST->getMemoryVT(), + Val, DAG.getIntPtrConstant(0, dl))); + else + Val = GetSoftenedFloat(Val); + + return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), + ST->getMemOperand()); +} + + +//===----------------------------------------------------------------------===// +// Float Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Expand float result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + + case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break; + case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break; + case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break; + case ISD::FMAXNUM: ExpandFloatRes_FMAXNUM(N, Lo, Hi); break; + case ISD::FADD: ExpandFloatRes_FADD(N, Lo, Hi); break; + case ISD::FCEIL: ExpandFloatRes_FCEIL(N, Lo, Hi); break; + case ISD::FCOPYSIGN: ExpandFloatRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::FCOS: ExpandFloatRes_FCOS(N, Lo, Hi); break; + case ISD::FDIV: ExpandFloatRes_FDIV(N, Lo, Hi); break; + case ISD::FEXP: ExpandFloatRes_FEXP(N, Lo, Hi); break; + case ISD::FEXP2: ExpandFloatRes_FEXP2(N, Lo, Hi); break; + case ISD::FFLOOR: ExpandFloatRes_FFLOOR(N, Lo, Hi); break; + case ISD::FLOG: ExpandFloatRes_FLOG(N, Lo, Hi); break; + case ISD::FLOG2: ExpandFloatRes_FLOG2(N, Lo, Hi); break; + case ISD::FLOG10: ExpandFloatRes_FLOG10(N, Lo, Hi); break; + case ISD::FMA: ExpandFloatRes_FMA(N, Lo, Hi); break; + case ISD::FMUL: ExpandFloatRes_FMUL(N, Lo, Hi); break; + case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break; + case ISD::FNEG: ExpandFloatRes_FNEG(N, Lo, Hi); break; + case ISD::FP_EXTEND: ExpandFloatRes_FP_EXTEND(N, Lo, Hi); break; + case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; + case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; + case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; + case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; + case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; + case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; + case ISD::FTRUNC: ExpandFloatRes_FTRUNC(N, Lo, Hi); break; + case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedFloat(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + assert(NVT.getSizeInBits() == integerPartWidth && + "Do not know how to expand this float constant!"); + APInt C = cast<ConstantFPSDNode>(N)->getValueAPF().bitcastToAPInt(); + SDLoc dl(N); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[1])), + dl, NVT); + Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[0])), + dl, NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDLoc dl(N); + SDValue Tmp; + GetExpandedFloat(N->getOperand(0), Lo, Tmp); + Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); + // Lo = Hi==fabs(Hi) ? Lo : -Lo; + Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo, + DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), + ISD::SETEQ); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMINNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMIN_F32, RTLIB::FMIN_F64, + RTLIB::FMIN_F80, RTLIB::FMIN_F128, + RTLIB::FMIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMAXNUM(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FMAX_F32, RTLIB::FMAX_F64, + RTLIB::FMAX_F80, RTLIB::FMAX_F128, + RTLIB::FMAX_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, RTLIB::ADD_F64, + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, RTLIB::CEIL_F64, + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COPYSIGN_F32, + RTLIB::COPYSIGN_F64, + RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, + RTLIB::COPYSIGN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, false, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, RTLIB::EXP_F64, + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, RTLIB::EXP2_F64, + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, RTLIB::LOG_F64, + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, RTLIB::LOG2_F64, + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, false, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, false, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + GetExpandedFloat(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + Hi = DAG.getNode(ISD::FP_EXTEND, dl, NVT, N->getOperand(0)); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), dl, NVT); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, RTLIB::POW_F64, + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, RTLIB::POWI_F64, + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, RTLIB::RINT_F64, + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, RTLIB::SQRT_F64, + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, false, + SDLoc(N)).first; + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, + SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + LoadSDNode *LD = cast<LoadSDNode>(N); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDLoc dl(N); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + + Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + LD->getMemoryVT(), LD->getMemOperand()); + + // Remember the chain. + Chain = Hi.getValue(1); + + // The low part is zero. + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), dl, NVT); + + // Modified the chain - switch anything that used the old chain to use the + // new one. + ReplaceValueWith(SDValue(LD, 1), Chain); +} + +void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(N->getValueType(0) == MVT::ppcf128 && "Unsupported XINT_TO_FP!"); + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; + SDLoc dl(N); + + // First do an SINT_TO_FP, whether the original was signed or unsigned. + // When promoting partial word types to i32 we must honor the signedness, + // though. + if (SrcVT.bitsLE(MVT::i32)) { + // The integer can be represented exactly in an f64. + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i32, Src); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), dl, NVT); + Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); + } else { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (SrcVT.bitsLE(MVT::i64)) { + Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, + MVT::i64, Src); + LC = RTLIB::SINTTOFP_I64_PPCF128; + } else if (SrcVT.bitsLE(MVT::i128)) { + Src = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i128, Src); + LC = RTLIB::SINTTOFP_I128_PPCF128; + } + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); + + Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first; + GetPairElements(Hi, Lo, Hi); + } + + if (isSigned) + return; + + // Unsigned - fix up the SINT_TO_FP value just calculated. + Hi = DAG.getNode(ISD::BUILD_PAIR, dl, VT, Lo, Hi); + SrcVT = Src.getValueType(); + + // x>=0 ? (ppcf128)(iN)x : (ppcf128)(iN)x + 2^N; N=32,64,128. + static const uint64_t TwoE32[] = { 0x41f0000000000000LL, 0 }; + static const uint64_t TwoE64[] = { 0x43f0000000000000LL, 0 }; + static const uint64_t TwoE128[] = { 0x47f0000000000000LL, 0 }; + ArrayRef<uint64_t> Parts; + + switch (SrcVT.getSimpleVT().SimpleTy) { + default: + llvm_unreachable("Unsupported UINT_TO_FP!"); + case MVT::i32: + Parts = TwoE32; + break; + case MVT::i64: + Parts = TwoE64; + break; + case MVT::i128: + Parts = TwoE128; + break; + } + + // TODO: Are there fast-math-flags to propagate to this FADD? + Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, + APInt(128, Parts)), + dl, MVT::ppcf128)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT), + Lo, Hi, ISD::SETLT); + GetPairElements(Lo, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Float Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandFloatOperand - This method is called when the specified operand of the +/// specified node is found to need expansion. At this point, all of the result +/// types of the node are known to be legal, but other operands of the node may +/// need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Expand float operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandFloatOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + + case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; + case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; + case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break; + case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// FloatExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + SDLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedFloat(NewLHS, LHSLo, LHSHi); + GetExpandedFloat(NewRHS, RHSLo, RHSHi); + + assert(NewLHS.getValueType() == MVT::ppcf128 && "Unsupported setcc type!"); + + // FIXME: This generated code sucks. We want to generate + // FCMPU crN, hi1, hi2 + // BNE crN, L: + // FCMPU crN, lo1, lo2 + // The following can be improved, but not that much. + SDValue Tmp1, Tmp2, Tmp3; + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETOEQ); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, CCCode); + Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETUNE); + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode); + Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); + NewRHS = SDValue(); // LHS is the result, not a compare. +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(0), Lo, Hi); + // Round it the rest of the way (e.g. to f32) if needed. + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0), Hi, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { + EVT RVT = N->getValueType(0); + SDLoc dl(N); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Res = DAG.getNode(ISD::FP_ROUND_INREG, dl, MVT::ppcf128, + N->getOperand(0), DAG.getValueType(MVT::f64)); + Res = DAG.getNode(ISD::FP_ROUND, dl, MVT::f64, Res, + DAG.getIntPtrConstant(1, dl)); + return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res); + } + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); + return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { + EVT RVT = N->getValueType(0); + SDLoc dl(N); + + // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on + // PPC (the libcall is not available). FIXME: Do this in a less hacky way. + if (RVT == MVT::i32) { + assert(N->getOperand(0).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; + APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); + SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128); + // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X + // FIXME: generated code sucks. + // TODO: Are there fast-math-flags to propagate to this FSUB? + return DAG.getSelectCC(dl, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, dl, + MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + ISD::SETGE); + } + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0), + false, dl).first; +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, use it. + if (!NewRHS.getNode()) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + StoreSDNode *ST = cast<StoreSDNode>(N); + + SDValue Chain = ST->getChain(); + SDValue Ptr = ST->getBasePtr(); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), + ST->getValue().getValueType()); + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + assert(ST->getMemoryVT().bitsLE(NVT) && "Float type not round?"); + (void)NVT; + + SDValue Lo, Hi; + GetExpandedOp(ST->getValue(), Lo, Hi); + + return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); +} + +//===----------------------------------------------------------------------===// +// Float Operand Promotion +//===----------------------------------------------------------------------===// +// + +static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + return ISD::FP16_TO_FP; + } else if (RetVT == MVT::f16) { + return ISD::FP_TO_FP16; + } + + report_fatal_error("Attempt at an invalid promotion-related conversion"); +} + +bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) { + SDValue R = SDValue(); + + // Nodes that use a promotion-requiring floating point operand, but doesn't + // produce a promotion-requiring floating point result, need to be legalized + // to use the promoted float operand. Nodes that produce at least one + // promotion-requiring floating point result have their operands legalized as + // a part of PromoteFloatResult. + switch (N->getOpcode()) { + default: + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break; + case ISD::FCOPYSIGN: R = PromoteFloatOp_FCOPYSIGN(N, OpNo); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: R = PromoteFloatOp_FP_TO_XINT(N, OpNo); break; + case ISD::FP_EXTEND: R = PromoteFloatOp_FP_EXTEND(N, OpNo); break; + case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; + case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + } + + if (R.getNode()) + ReplaceValueWith(SDValue(N, 0), R); + return false; +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo) { + SDValue Op = N->getOperand(0); + EVT OpVT = Op->getValueType(0); + + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits()); + assert (IVT == N->getValueType(0) && "Bitcast to type of different size"); + + SDValue Promoted = GetPromotedFloat(N->getOperand(0)); + EVT PromotedVT = Promoted->getValueType(0); + + // Convert the promoted float value to the desired IVT. + return DAG.getNode(GetPromotionOpcode(PromotedVT, OpVT), SDLoc(N), IVT, + Promoted); +} + +// Promote Operand 1 of FCOPYSIGN. Operand 0 ought to be handled by +// PromoteFloatRes_FCOPYSIGN. +SDValue DAGTypeLegalizer::PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo) { + assert (OpNo == 1 && "Only Operand 1 must need promotion here"); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), + N->getOperand(0), Op1); +} + +// Convert the promoted float value to the desired integer type +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo) { + SDValue Op = GetPromotedFloat(N->getOperand(0)); + EVT VT = N->getValueType(0); + + // Desired VT is same as promoted type. Use promoted float directly. + if (VT == Op->getValueType(0)) + return Op; + + // Else, extend the promoted float value to the desired VT. + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Op); +} + +// Promote the float operands used for comparison. The true- and false- +// operands have the same type as the result and are promoted, if needed, by +// PromoteFloatRes_SELECT_CC +SDValue DAGTypeLegalizer::PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo) { + SDValue LHS = GetPromotedFloat(N->getOperand(0)); + SDValue RHS = GetPromotedFloat(N->getOperand(1)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + LHS, RHS, N->getOperand(2), N->getOperand(3), + N->getOperand(4)); +} + +// Construct a SETCC that compares the promoted values and sets the conditional +// code. +SDValue DAGTypeLegalizer::PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + + return DAG.getSetCC(SDLoc(N), NVT, Op0, Op1, CCCode); + +} + +// Lower the promoted Float down to the integer value of same size and construct +// a STORE of the integer value. +SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) { + StoreSDNode *ST = cast<StoreSDNode>(N); + SDValue Val = ST->getValue(); + SDLoc DL(N); + + SDValue Promoted = GetPromotedFloat(Val); + EVT VT = ST->getOperand(1)->getValueType(0); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + SDValue NewVal; + NewVal = DAG.getNode(GetPromotionOpcode(Promoted.getValueType(), VT), DL, + IVT, Promoted); + + return DAG.getStore(ST->getChain(), DL, NewVal, ST->getBasePtr(), + ST->getMemOperand()); +} + +//===----------------------------------------------------------------------===// +// Float Result Promotion +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) { + SDValue R = SDValue(); + + switch (N->getOpcode()) { + // These opcodes cannot appear if promotion of FP16 is done in the backend + // instead of Clang + case ISD::FP16_TO_FP: + case ISD::FP_TO_FP16: + default: + llvm_unreachable("Do not know how to promote this operator's result!"); + + case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break; + case ISD::ConstantFP: R = PromoteFloatRes_ConstantFP(N); break; + case ISD::EXTRACT_VECTOR_ELT: + R = PromoteFloatRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::FCOPYSIGN: R = PromoteFloatRes_FCOPYSIGN(N); break; + + // Unary FP Operations + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: R = PromoteFloatRes_UnaryOp(N); break; + + // Binary FP Operations + case ISD::FADD: + case ISD::FDIV: + case ISD::FMAXNUM: + case ISD::FMINNUM: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: R = PromoteFloatRes_BinOp(N); break; + + case ISD::FMA: // FMA is same as FMAD + case ISD::FMAD: R = PromoteFloatRes_FMAD(N); break; + + case ISD::FPOWI: R = PromoteFloatRes_FPOWI(N); break; + + case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; + case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; + case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break; + case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break; + + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break; + case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break; + + } + + if (R.getNode()) + SetPromotedFloat(SDValue(N, ResNo), R); +} + +// Bitcast from i16 to f16: convert the i16 to a f32 value instead. +// At this point, it is not possible to determine if the bitcast value is +// eventually stored to memory or promoted to f32 or promoted to a floating +// point at a higher precision. Some of these cases are handled by FP_EXTEND, +// STORE promotion handlers. +SDValue DAGTypeLegalizer::PromoteFloatRes_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, + N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_ConstantFP(SDNode *N) { + ConstantFPSDNode *CFPNode = cast<ConstantFPSDNode>(N); + EVT VT = N->getValueType(0); + SDLoc DL(N); + + // Get the (bit-cast) APInt of the APFloat and build an integer constant + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue C = DAG.getConstant(CFPNode->getValueAPF().bitcastToAPInt(), DL, + IVT); + + // Convert the Constant to the desired FP type + // FIXME We might be able to do the conversion during compilation and get rid + // of it from the object code + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, C); +} + +// If the Index operand is a constant, try to redirect the extract operation to +// the correct legalized vector. If not, bit-convert the input vector to +// equivalent integer vector. Extract the element as an (bit-cast) integer +// value and convert it to the promoted type. +SDValue DAGTypeLegalizer::PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc DL(N); + + // If the index is constant, try to extract the value from the legalized + // vector type. + if (isa<ConstantSDNode>(N->getOperand(1))) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec->getValueType(0); + EVT EltVT = VecVT.getVectorElementType(); + + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + switch (getTypeAction(VecVT)) { + default: break; + case TargetLowering::TypeScalarizeVector: { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeWidenVector: { + Vec = GetWidenedVector(Vec); + SDValue Res = DAG.getNode(N->getOpcode(), DL, EltVT, Vec, Idx); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + case TargetLowering::TypeSplitVector: { + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + SDValue Res; + if (IdxVal < LoElts) + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Lo, Idx); + else + Res = DAG.getNode(N->getOpcode(), DL, EltVT, Hi, + DAG.getConstant(IdxVal - LoElts, DL, + Idx.getValueType())); + ReplaceValueWith(SDValue(N, 0), Res); + return SDValue(); + } + + } + } + + // Bit-convert the input vector to the equivalent integer vector + SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); + EVT IVT = NewOp.getValueType().getVectorElementType(); + + // Extract the element as an (bit-cast) integer value + SDValue NewVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IVT, + NewOp, N->getOperand(1)); + + // Convert the element to the desired FP type + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, NewVal); +} + +// FCOPYSIGN(X, Y) returns the value of X with the sign of Y. If the result +// needs promotion, so does the argument X. Note that Y, if needed, will be +// handled during operand promotion. +SDValue DAGTypeLegalizer::PromoteFloatRes_FCOPYSIGN(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Unary operation where the result and the operand have PromoteFloat type +// action. Construct a new SDNode with the promoted float value of the old +// operand. +SDValue DAGTypeLegalizer::PromoteFloatRes_UnaryOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op); +} + +// Binary operations where the result and both operands have PromoteFloat type +// action. Construct a new SDNode with the promoted float values of the old +// operands. +SDValue DAGTypeLegalizer::PromoteFloatRes_BinOp(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, N->getFlags()); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_FMAD(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = GetPromotedFloat(N->getOperand(1)); + SDValue Op2 = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1, Op2); +} + +// Promote the Float (first) operand and retain the Integer (second) operand +SDValue DAGTypeLegalizer::PromoteFloatRes_FPOWI(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op0 = GetPromotedFloat(N->getOperand(0)); + SDValue Op1 = N->getOperand(1); + + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); +} + +// Explicit operation to reduce precision. Reduce the value to half precision +// and promote it back to the legal type. +SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { + SDLoc DL(N); + + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + EVT OpVT = Op->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + + // Round promoted float to desired precision + SDValue Round = DAG.getNode(GetPromotionOpcode(OpVT, VT), DL, IVT, Op); + // Promote it back to the legal output type + return DAG.getNode(GetPromotionOpcode(VT, NVT), DL, NVT, Round); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) { + LoadSDNode *L = cast<LoadSDNode>(N); + EVT VT = N->getValueType(0); + + // Load the value as an integer value with the same number of bits + EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); + SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + IVT, SDLoc(N), L->getChain(), L->getBasePtr(), + L->getOffset(), L->getPointerInfo(), IVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getAAInfo()); + // Legalize the chain result by replacing uses of the old value chain with the + // new one + ReplaceValueWith(SDValue(N, 1), newL.getValue(1)); + + // Convert the integer value to the desired FP type + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(GetPromotionOpcode(VT, NVT), SDLoc(N), NVT, newL); +} + +// Construct a new SELECT node with the promoted true- and false- values. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(1)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(2)); + + return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0), + N->getOperand(0), TrueVal, FalseVal); +} + +// Construct a new SELECT_CC node with the promoted true- and false- values. +// The operands used for comparison are promoted by PromoteFloatOp_SELECT_CC. +SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT_CC(SDNode *N) { + SDValue TrueVal = GetPromotedFloat(N->getOperand(2)); + SDValue FalseVal = GetPromotedFloat(N->getOperand(3)); + + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N->getValueType(0), + N->getOperand(0), N->getOperand(1), TrueVal, FalseVal, + N->getOperand(4)); +} + +// Construct a SDNode that transforms the SINT or UINT operand to the promoted +// float type. +SDValue DAGTypeLegalizer::PromoteFloatRes_XINT_TO_FP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp new file mode 100644 index 0000000..cd114d6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -0,0 +1,3323 @@ +//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements integer type expansion and promotion for LegalizeTypes. +// Promotion is the act of changing a computation in an illegal type into a +// computation in a larger type. For example, implementing i8 arithmetic in an +// i32 register (often needed on powerpc). +// Expansion is the act of changing a computation in an illegal type into a +// computation in two identical registers of a smaller type. For example, +// implementing i64 arithmetic in two i32 registers (often needed on 32-bit +// targets). +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Integer Result Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerResult - This method is called when a result of a node is +/// found to be in need of promotion to a larger type. At this point, the node +/// may also have invalid operands or may have other results that need +/// expansion, we just know that (at least) one result needs promotion. +void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Promote integer result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "PromoteIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to promote this operator!"); + case ISD::MERGE_VALUES:Res = PromoteIntRes_MERGE_VALUES(N, ResNo); break; + case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break; + case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break; + case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break; + case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break; + case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break; + case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break; + case ISD::Constant: Res = PromoteIntRes_Constant(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntRes_CONVERT_RNDSAT(N); break; + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break; + case ISD::CTPOP: Res = PromoteIntRes_CTPOP(N); break; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break; + case ISD::EXTRACT_VECTOR_ELT: + Res = PromoteIntRes_EXTRACT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = PromoteIntRes_LOAD(cast<LoadSDNode>(N)); break; + case ISD::MLOAD: Res = PromoteIntRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; + case ISD::MGATHER: Res = PromoteIntRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; + case ISD::SELECT: Res = PromoteIntRes_SELECT(N); break; + case ISD::VSELECT: Res = PromoteIntRes_VSELECT(N); break; + case ISD::SELECT_CC: Res = PromoteIntRes_SELECT_CC(N); break; + case ISD::SETCC: Res = PromoteIntRes_SETCC(N); break; + case ISD::SMIN: + case ISD::SMAX: Res = PromoteIntRes_SExtIntBinOp(N); break; + case ISD::UMIN: + case ISD::UMAX: Res = PromoteIntRes_ZExtIntBinOp(N); break; + + case ISD::SHL: Res = PromoteIntRes_SHL(N); break; + case ISD::SIGN_EXTEND_INREG: + Res = PromoteIntRes_SIGN_EXTEND_INREG(N); break; + case ISD::SRA: Res = PromoteIntRes_SRA(N); break; + case ISD::SRL: Res = PromoteIntRes_SRL(N); break; + case ISD::TRUNCATE: Res = PromoteIntRes_TRUNCATE(N); break; + case ISD::UNDEF: Res = PromoteIntRes_UNDEF(N); break; + case ISD::VAARG: Res = PromoteIntRes_VAARG(N); break; + + case ISD::EXTRACT_SUBVECTOR: + Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break; + case ISD::VECTOR_SHUFFLE: + Res = PromoteIntRes_VECTOR_SHUFFLE(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break; + case ISD::BUILD_VECTOR: + Res = PromoteIntRes_BUILD_VECTOR(N); break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break; + case ISD::CONCAT_VECTORS: + Res = PromoteIntRes_CONCAT_VECTORS(N); break; + + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break; + + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break; + + case ISD::FP_TO_FP16: Res = PromoteIntRes_FP_TO_FP16(N); break; + + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break; + + case ISD::SDIV: + case ISD::SREM: Res = PromoteIntRes_SExtIntBinOp(N); break; + + case ISD::UDIV: + case ISD::UREM: Res = PromoteIntRes_ZExtIntBinOp(N); break; + + case ISD::SADDO: + case ISD::SSUBO: Res = PromoteIntRes_SADDSUBO(N, ResNo); break; + case ISD::UADDO: + case ISD::USUBO: Res = PromoteIntRes_UADDSUBO(N, ResNo); break; + case ISD::SMULO: + case ISD::UMULO: Res = PromoteIntRes_XMULO(N, ResNo); break; + + case ISD::ATOMIC_LOAD: + Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; + + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); + break; + } + + // If the result is null then the sub-method took care of registering it. + if (Res.getNode()) + SetPromotedInteger(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetPromotedInteger(Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { + // Sign-extend the new bits, and continue the assertion. + SDValue Op = SExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertSext, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { + // Zero the new bits, and continue the assertion. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::AssertZext, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { + EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), + N->getMemoryVT(), ResVT, + N->getChain(), N->getBasePtr(), + N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), + N->getMemoryVT(), + N->getChain(), N->getBasePtr(), + Op2, N->getMemOperand(), N->getOrdering(), + N->getSynchScope()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, + unsigned ResNo) { + if (ResNo == 1) { + assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + EVT SVT = getSetCCResultType(N->getOperand(2).getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, + N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), + N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), + N->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res.getValue(1); + } + + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + SDValue Op3 = GetPromotedInteger(N->getOperand(3)); + SDVTList VTs = + DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), + N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); + // Update the use to N with the newly created Res. + for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i) + ReplaceValueWith(SDValue(N, i), Res.getValue(i)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDLoc dl(N); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector() && !NInVT.isVector()) + // The input promotes to the same size. Convert the promoted value. + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp)); + break; + case TargetLowering::TypeSoftenFloat: + // Promote the integer operand by hand. + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp)); + case TargetLowering::TypePromoteFloat: { + // Convert the promoted float by hand. + if (NOutVT.bitsEq(NInVT)) { + SDValue PromotedOp = GetPromotedFloat(InOp); + SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp); + return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc, + DAG.getValueType(OutVT)); + } + break; + } + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + break; + case TargetLowering::TypeScalarizeVector: + // Convert the element to an integer and promote it by hand. + if (!NOutVT.isVector()) + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + BitConvertToInteger(GetScalarizedVector(InOp))); + break; + case TargetLowering::TypeSplitVector: { + // For example, i32 = BITCAST v2i16 on alpha. Convert the split + // pieces of the input into integers and reassemble in the final type. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + InOp = DAG.getNode(ISD::ANY_EXTEND, dl, + EVT::getIntegerVT(*DAG.getContext(), + NOutVT.getSizeInBits()), + JoinIntegers(Lo, Hi)); + return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp); + } + case TargetLowering::TypeWidenVector: + // The input is widened to the same size. Convert to the widened value. + // Make sure that the outgoing value is not a vector, because this would + // make us bitcast between two vectors which are legalized in different ways. + if (NOutVT.bitsEq(NInVT) && !NOutVT.isVector()) + return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetWidenedVector(InOp)); + } + + return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, + CreateStackStoreLoad(InOp, OutVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + return DAG.getNode( + ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + + unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(); + return DAG.getNode( + ISD::SRL, dl, NVT, DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), + DAG.getConstant(DiffBits, dl, + TLI.getShiftAmountTy(NVT, DAG.getDataLayout()))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { + // The pair element type may be legal, or may not promote to the same type as + // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), JoinIntegers(N->getOperand(0), + N->getOperand(1))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { + EVT VT = N->getValueType(0); + // FIXME there is no actual debug info here + SDLoc dl(N); + // Zero extend things like i1, sign extend everything else. It shouldn't + // matter in theory which one we pick, but this tends to give better code? + unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + SDValue Result = DAG.getNode(Opc, dl, + TLI.getTypeToTransformTo(*DAG.getContext(), VT), + SDValue(N, 0)); + assert(isa<ConstantSDNode>(Result) && "Didn't constant fold ext?"); + return Result; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && + "can only promote integers"); + EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0), + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + SDLoc dl(N); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); + // Subtract off the extra leading bits in the bigger type. + return DAG.getNode( + ISD::SUB, dl, NVT, Op, + DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, + NVT)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { + // Zero extend to the promoted type and do the count there. + SDValue Op = ZExtPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + EVT OVT = N->getValueType(0); + EVT NVT = Op.getValueType(); + SDLoc dl(N); + if (N->getOpcode() == ISD::CTTZ) { + // The count is the same in the promoted type except if the original + // value was zero. This can be handled by setting the bit just off + // the top of the original type. + auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(), + OVT.getScalarSizeInBits()); + Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT)); + } + return DAG.getNode(N->getOpcode(), dl, NVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc dl(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), + N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NewOpc = N->getOpcode(); + SDLoc dl(N); + + // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is + // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT + // and SINT conversions are Custom, there is no way to tell which is + // preferable. We choose SINT because that's the right thing on PPC.) + if (N->getOpcode() == ISD::FP_TO_UINT && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && + TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) + NewOpc = ISD::FP_TO_SINT; + + SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0)); + + // Assert that the converted value fits in the original type. If it doesn't + // (eg: because the value being converted is too big), then the result of the + // original operation was undefined anyway, so the assert is still correct. + return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ? + ISD::AssertZext : ISD::AssertSext, dl, NVT, Res, + DAG.getValueType(N->getValueType(0).getScalarType())); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_FP16(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); + + return DAG.getNode(ISD::AssertZext, dl, + NVT, Res, DAG.getValueType(N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + + if (getTypeAction(N->getOperand(0).getValueType()) + == TargetLowering::TypePromoteInteger) { + SDValue Res = GetPromotedInteger(N->getOperand(0)); + assert(Res.getValueType().bitsLE(NVT) && "Extension doesn't make sense!"); + + // If the result and operand types are the same after promotion, simplify + // to an in-register extension. + if (NVT == Res.getValueType()) { + // The high bits are not guaranteed to be anything. Insert an extend. + if (N->getOpcode() == ISD::SIGN_EXTEND) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(N->getOperand(0).getValueType())); + if (N->getOpcode() == ISD::ZERO_EXTEND) + return DAG.getZeroExtendInReg(Res, dl, + N->getOperand(0).getValueType().getScalarType()); + assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!"); + return Res; + } + } + + // Otherwise, just extend the original operand all the way to the larger type. + return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + ISD::LoadExtType ExtType = + ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); + SDLoc dl(N); + SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + N->getMemoryVT(), N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getSrc0()); + + SDLoc dl(N); + SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(), + N->getMask(), ExtSrc0, N->getMemoryVT(), + N->getMemOperand(), ISD::SEXTLOAD); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue ExtSrc0 = GetPromotedInteger(N->getValue()); + assert(NVT == ExtSrc0.getValueType() && + "Gather result type and the passThru agrument type should be the same"); + + SDLoc dl(N); + SDValue Ops[] = {N->getChain(), ExtSrc0, N->getMask(), N->getBasePtr(), + N->getIndex()}; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +/// Promote the overflow flag of an overflowing arithmetic node. +SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { + // Simply change the return type of the boolean result. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT ValueVTs[] = { N->getValueType(0), NVT }; + SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), + DAG.getVTList(ValueVTs), Ops); + + // Modified the sum result - switch anything that used the old sum to use + // the new one. + ReplaceValueWith(SDValue(N, 0), Res); + + return SDValue(Res.getNode(), 1); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // sign extension of its truncation to the original type. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + SDLoc dl(N); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: sign extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res, + DAG.getValueType(OVT)); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { + SDValue Mask = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + Mask = PromoteTargetBoolean(Mask, OpTy); + SDValue LHS = GetPromotedInteger(N->getOperand(1)); + SDValue RHS = GetPromotedInteger(N->getOperand(2)); + return DAG.getNode(ISD::VSELECT, SDLoc(N), + LHS.getValueType(), Mask, LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetPromotedInteger(N->getOperand(2)); + SDValue RHS = GetPromotedInteger(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + LHS.getValueType(), N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { + EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); + + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDLoc dl(N); + assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && + "Vector compare must return a vector result!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getValueType() != RHS.getValueType()) { + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && + !LHS.getValueType().isVector()) + LHS = GetPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && + !RHS.getValueType().isVector()) + RHS = GetPromotedInteger(RHS); + } + + // Get the SETCC result using the canonical SETCC type. + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, + N->getOperand(2)); + + assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); + // Convert to the expected type. + return DAG.getNode(ISD::TRUNCATE, dl, NVT, SetCC); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) + LHS = GetPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SHL, SDLoc(N), LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { + // The input may have strange things in the top bits of the registers, but + // these operations don't care. They may have weird bits going out, but + // that too is okay if they are integer operations. + SDValue LHS = GetPromotedInteger(N->getOperand(0)); + SDValue RHS = GetPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SExtIntBinOp(SDNode *N) { + // Sign extend the input. + SDValue LHS = SExtPromotedInteger(N->getOperand(0)); + SDValue RHS = SExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_ZExtIntBinOp(SDNode *N) { + // Zero extend the input. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + // The input value must be properly sign extended. + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) + LHS = SExtPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SRA, SDLoc(N), LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + // The input value must be properly zero extended. + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger) + LHS = ZExtPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger) + RHS = ZExtPromotedInteger(RHS); + return DAG.getNode(ISD::SRL, SDLoc(N), LHS.getValueType(), LHS, RHS); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res; + SDValue InOp = N->getOperand(0); + SDLoc dl(N); + + switch (getTypeAction(InOp.getValueType())) { + default: llvm_unreachable("Unknown type action!"); + case TargetLowering::TypeLegal: + case TargetLowering::TypeExpandInteger: + Res = InOp; + break; + case TargetLowering::TypePromoteInteger: + Res = GetPromotedInteger(InOp); + break; + case TargetLowering::TypeSplitVector: + EVT InVT = InOp.getValueType(); + assert(InVT.isVector() && "Cannot split scalar types"); + unsigned NumElts = InVT.getVectorNumElements(); + assert(NumElts == NVT.getVectorNumElements() && + "Dst and Src must have the same number of elements"); + assert(isPowerOf2_32(NumElts) && + "Promoted vector type must be a power of two"); + + SDValue EOp1, EOp2; + GetSplitVector(InOp, EOp1, EOp2); + + EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(), + NumElts/2); + EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1); + EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2); + } + + // Truncate to NVT instead of VT + return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + // The operation overflowed iff the result in the larger type is not the + // zero extension of its truncation to the original type. + SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); + SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = LHS.getValueType(); + SDLoc dl(N); + + // Do the arithmetic in the larger type. + unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; + SDValue Res = DAG.getNode(Opcode, dl, NVT, LHS, RHS); + + // Calculate the overflow flag: zero extend the arithmetic result from + // the original type. + SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT); + // Overflowed if and only if this is not equal to Res. + Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); + + return Res; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { + // Promote the overflow bit trivially. + if (ResNo == 1) + return PromoteIntRes_Overflow(N); + + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + SDLoc DL(N); + EVT SmallVT = LHS.getValueType(); + + // To determine if the result overflowed in a larger type, we extend the + // input to the larger type, do the multiply (checking if it overflows), + // then also check the high bits of the result to see if overflow happened + // there. + if (N->getOpcode() == ISD::SMULO) { + LHS = SExtPromotedInteger(LHS); + RHS = SExtPromotedInteger(RHS); + } else { + LHS = ZExtPromotedInteger(LHS); + RHS = ZExtPromotedInteger(RHS); + } + SDVTList VTs = DAG.getVTList(LHS.getValueType(), N->getValueType(1)); + SDValue Mul = DAG.getNode(N->getOpcode(), DL, VTs, LHS, RHS); + + // Overflow occurred if it occurred in the larger type, or if the high part + // of the result does not zero/sign-extend the low part. Check this second + // possibility first. + SDValue Overflow; + if (N->getOpcode() == ISD::UMULO) { + // Unsigned overflow occurred if the high part is non-zero. + SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul, + DAG.getIntPtrConstant(SmallVT.getSizeInBits(), + DL)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi, + DAG.getConstant(0, DL, Hi.getValueType()), + ISD::SETNE); + } else { + // Signed overflow occurred if the high part does not sign extend the low. + SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(), + Mul, DAG.getValueType(SmallVT)); + Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE); + } + + // The only other way for overflow to occur is if the multiplication in the + // larger type itself overflowed. + Overflow = DAG.getNode(ISD::OR, DL, N->getValueType(1), Overflow, + SDValue(Mul.getNode(), 1)); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Overflow); + return Mul; +} + +SDValue DAGTypeLegalizer::PromoteIntRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0))); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { + SDValue Chain = N->getOperand(0); // Get the chain. + SDValue Ptr = N->getOperand(1); // Get the pointer. + EVT VT = N->getValueType(0); + SDLoc dl(N); + + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); + // The argument is passed as NumRegs registers of type RegVT. + + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned i = 0; i < NumRegs; ++i) { + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); + Chain = Parts[i].getValue(1); + } + + // Handle endianness of the load. + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Parts.begin(), Parts.end()); + + // Assemble the parts in the promoted type. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Res = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[0]); + for (unsigned i = 1; i < NumRegs; ++i) { + SDValue Part = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Parts[i]); + // Shift it to the right position and "or" it in. + Part = DAG.getNode(ISD::SHL, dl, NVT, Part, + DAG.getConstant(i * RegVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Res = DAG.getNode(ISD::OR, dl, NVT, Res, Part); + } + + // Modified the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Chain); + + return Res; +} + +//===----------------------------------------------------------------------===// +// Integer Operand Promotion +//===----------------------------------------------------------------------===// + +/// PromoteIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need promotion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Promote integer operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "PromoteIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to promote this operator's operand!"); + + case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break; + case ISD::ATOMIC_STORE: + Res = PromoteIntOp_ATOMIC_STORE(cast<AtomicSDNode>(N)); + break; + case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break; + case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break; + case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break; + case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: Res = PromoteIntOp_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = PromoteIntOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = PromoteIntOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONVERT_RNDSAT: + Res = PromoteIntOp_CONVERT_RNDSAT(N); break; + case ISD::INSERT_VECTOR_ELT: + Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break; + case ISD::SCALAR_TO_VECTOR: + Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break; + case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break; + case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break; + case ISD::SIGN_EXTEND: Res = PromoteIntOp_SIGN_EXTEND(N); break; + case ISD::SINT_TO_FP: Res = PromoteIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = PromoteIntOp_STORE(cast<StoreSDNode>(N), + OpNo); break; + case ISD::MSTORE: Res = PromoteIntOp_MSTORE(cast<MaskedStoreSDNode>(N), + OpNo); break; + case ISD::MLOAD: Res = PromoteIntOp_MLOAD(cast<MaskedLoadSDNode>(N), + OpNo); break; + case ISD::MGATHER: Res = PromoteIntOp_MGATHER(cast<MaskedGatherSDNode>(N), + OpNo); break; + case ISD::MSCATTER: Res = PromoteIntOp_MSCATTER(cast<MaskedScatterSDNode>(N), + OpNo); break; + case ISD::TRUNCATE: Res = PromoteIntOp_TRUNCATE(N); break; + case ISD::FP16_TO_FP: + case ISD::UINT_TO_FP: Res = PromoteIntOp_UINT_TO_FP(N); break; + case ISD::ZERO_EXTEND: Res = PromoteIntOp_ZERO_EXTEND(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = PromoteIntOp_EXTRACT_SUBVECTOR(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = PromoteIntOp_Shift(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// PromoteSetCCOperands - Promote the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, + ISD::CondCode CCCode) { + // We have to insert explicit sign or zero extends. Note that we could + // insert sign extends for ALL conditions, but zero extend is cheaper on + // many machines (an AND instead of two shifts), so prefer it. + switch (CCCode) { + default: llvm_unreachable("Unknown integer comparison!"); + case ISD::SETEQ: + case ISD::SETNE: { + SDValue OpL = GetPromotedInteger(NewLHS); + SDValue OpR = GetPromotedInteger(NewRHS); + + // We would prefer to promote the comparison operand with sign extension, + // if we find the operand is actually to truncate an AssertSext. With this + // optimization, we can avoid inserting real truncate instruction, which + // is redudant eventually. + if (OpL->getOpcode() == ISD::AssertSext && + cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() && + OpR->getOpcode() == ISD::AssertSext && + cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) { + NewLHS = OpL; + NewRHS = OpR; + } else { + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + } + break; + } + case ISD::SETUGE: + case ISD::SETUGT: + case ISD::SETULE: + case ISD::SETULT: + // ALL of these operations will work if we either sign or zero extend + // the operands (including the unsigned comparisons!). Zero extend is + // usually a simpler/cheaper operation, so prefer it. + NewLHS = ZExtPromotedInteger(NewLHS); + NewRHS = ZExtPromotedInteger(NewRHS); + break; + case ISD::SETGE: + case ISD::SETGT: + case ISD::SETLT: + case ISD::SETLE: + NewLHS = SExtPromotedInteger(NewLHS); + NewRHS = SExtPromotedInteger(NewRHS); + break; + } +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); + return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), + N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), + N->getOrdering(), N->getSynchScope()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) { + // This should only occur in unusual situations like bitcasting to an + // x86_fp80, so just turn it into a store+load + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 2 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(1))->get()); + + // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always + // legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "only know how to promote condition"); + + // Promote all the way up to the canonical SetCC type. + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); + + // The chain (Op#0) and basic block destination (Op#2) are always legal types. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { + // Since the result type is legal, the operands must promote to it. + EVT OVT = N->getOperand(0).getValueType(); + SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); + SDValue Hi = GetPromotedInteger(N->getOperand(1)); + assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); + SDLoc dl(N); + + Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, + DAG.getConstant(OVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::OR, dl, N->getValueType(0), Lo, Hi); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type is not. This implies + // that the vector is a power-of-two in length and that the element + // type does not have a strange size (eg: it is not i1). + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); + + // Promote the inserted value. The type does not need to match the + // vector element type. Check that any extra bits introduced will be + // truncated away. + assert(N->getOperand(0).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + + SmallVector<SDValue, 16> NewOps; + for (unsigned i = 0; i < NumElts; ++i) + NewOps.push_back(GetPromotedInteger(N->getOperand(i))); + + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + assert ((CvtCode == ISD::CVT_SS || CvtCode == ISD::CVT_SU || + CvtCode == ISD::CVT_US || CvtCode == ISD::CVT_UU || + CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && + "can only promote integer arguments"); + SDValue InOp = GetPromotedInteger(N->getOperand(0)); + return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp, + N->getOperand(1), N->getOperand(2), + N->getOperand(3), N->getOperand(4), CvtCode); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + if (OpNo == 1) { + // Promote the inserted value. This is valid because the type does not + // have to match the vector element type. + + // Check that any extra bits introduced will be truncated away. + assert(N->getOperand(1).getValueType().getSizeInBits() >= + N->getValueType(0).getVectorElementType().getSizeInBits() && + "Type of inserted value narrower than vector element type!"); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + GetPromotedInteger(N->getOperand(1)), + N->getOperand(2)), + 0); + } + + assert(OpNo == 2 && "Different operand and result vector types?"); + + // Promote the index. + SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), + TLI.getVectorIdxTy(DAG.getDataLayout())); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { + // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote + // the operand in place. + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Only know how to promote the condition!"); + SDValue Cond = N->getOperand(0); + EVT OpTy = N->getOperand(1).getValueType(); + + // Promote all the way up to the canonical SetCC type. + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); + + return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), + N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(4))->get()); + + // The CC (#4) and the possible return values (#2 and #3) have legal types. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { + assert(OpNo == 0 && "Don't know how to promote this operand!"); + + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + PromoteSetCCOperands(LHS, RHS, cast<CondCodeSDNode>(N->getOperand(2))->get()); + + // The CC (#2) is always legal. + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + SDLoc dl(N); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), + Op, DAG.getValueType(N->getOperand(0).getValueType())); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); + SDLoc dl(N); + + SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. + + // Truncate the value and store the result. + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + + SDValue DataOp = N->getValue(); + EVT DataVT = DataOp.getValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(N); + + bool TruncateStore = false; + if (OpNo == 2) { + // Mask comes before the data operand. If the data operand is legal, we just + // promote the mask. + // When the data operand has illegal type, we should legalize the data + // operand first. The mask will be promoted/splitted/widened according to + // the data operand type. + if (TLI.isTypeLegal(DataVT)) + Mask = PromoteTargetBoolean(Mask, DataVT); + else { + if (getTypeAction(DataVT) == TargetLowering::TypePromoteInteger) + return PromoteIntOp_MSTORE(N, 3); + + else if (getTypeAction(DataVT) == TargetLowering::TypeWidenVector) + return WidenVecOp_MSTORE(N, 3); + + else { + assert (getTypeAction(DataVT) == TargetLowering::TypeSplitVector); + return SplitVecOp_MSTORE(N, 3); + } + } + } else { // Data operand + assert(OpNo == 3 && "Unexpected operand for promotion"); + DataOp = GetPromotedInteger(DataOp); + Mask = PromoteTargetBoolean(Mask, DataOp.getValueType()); + TruncateStore = true; + } + + return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask, + N->getMemoryVT(), N->getMemOperand(), + TruncateStore); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N, + unsigned OpNo) { + assert(OpNo == 2 && "Only know how to promote the mask!"); + EVT DataVT = N->getValueType(0); + SDValue Mask = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end()); + NewOps[OpNo] = Mask; + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N, + unsigned OpNo) { + + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValueType(0); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SmallVector<SDValue, 5> NewOps(N->op_begin(), N->op_end()); + if (OpNo == 2) { + // The Mask + EVT DataVT = N->getValue().getValueType(); + NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT); + } else + NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { + SDValue Op = GetPromotedInteger(N->getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { + SDLoc dl(N); + SDValue Op = GetPromotedInteger(N->getOperand(0)); + Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); + return DAG.getZeroExtendInReg(Op, dl, + N->getOperand(0).getValueType().getScalarType()); +} + + +//===----------------------------------------------------------------------===// +// Integer Result Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerResult - This method is called when the specified result of the +/// specified node is found to need expansion. At this point, the node may also +/// have invalid operands or may have other results that need promotion, we just +/// know that (at least) one result needs expansion. +void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Expand integer result: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Lo, Hi; + Lo = Hi = SDValue(); + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ExpandIntegerResult #" << ResNo << ": "; + N->dump(&DAG); dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to expand the result of this operator!"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + + case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break; + case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break; + case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break; + + case ISD::ANY_EXTEND: ExpandIntRes_ANY_EXTEND(N, Lo, Hi); break; + case ISD::AssertSext: ExpandIntRes_AssertSext(N, Lo, Hi); break; + case ISD::AssertZext: ExpandIntRes_AssertZext(N, Lo, Hi); break; + case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break; + case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break; + case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break; + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break; + case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break; + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTTZ: ExpandIntRes_CTTZ(N, Lo, Hi); break; + case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break; + case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break; + case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break; + case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break; + case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break; + case ISD::SDIV: ExpandIntRes_SDIV(N, Lo, Hi); break; + case ISD::SIGN_EXTEND: ExpandIntRes_SIGN_EXTEND(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: ExpandIntRes_SIGN_EXTEND_INREG(N, Lo, Hi); break; + case ISD::SREM: ExpandIntRes_SREM(N, Lo, Hi); break; + case ISD::TRUNCATE: ExpandIntRes_TRUNCATE(N, Lo, Hi); break; + case ISD::UDIV: ExpandIntRes_UDIV(N, Lo, Hi); break; + case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break; + case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break; + case ISD::ATOMIC_LOAD: ExpandIntRes_ATOMIC_LOAD(N, Lo, Hi); break; + + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP: { + std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N); + SplitInteger(Tmp.first, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Tmp.second); + break; + } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + AtomicSDNode *AN = cast<AtomicSDNode>(N); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue Tmp = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, + N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), + AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(), + AN->getSynchScope()); + + // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine + // success simply by comparing the loaded value against the ingoing + // comparison. + SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp, + N->getOperand(2), ISD::SETEQ); + + SplitInteger(Tmp, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Success); + ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1)); + break; + } + + case ISD::AND: + case ISD::OR: + case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break; + + case ISD::ADD: + case ISD::SUB: ExpandIntRes_ADDSUB(N, Lo, Hi); break; + + case ISD::ADDC: + case ISD::SUBC: ExpandIntRes_ADDSUBC(N, Lo, Hi); break; + + case ISD::ADDE: + case ISD::SUBE: ExpandIntRes_ADDSUBE(N, Lo, Hi); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; + case ISD::UMULO: + case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetExpandedInteger(SDValue(N, ResNo), Lo, Hi); +} + +/// Lower an atomic node to the appropriate builtin call. +std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC = RTLIB::getATOMIC(Opc, VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected atomic op or value type!"); + + return ExpandChainLibCall(LC, Node, false); +} + +/// N is a shift by a value that needs to be expanded, +/// and the shift amount is a constant 'Amt'. Expand the operation. +void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, const APInt &Amt, + SDValue &Lo, SDValue &Hi) { + SDLoc DL(N); + // Expand the incoming operand to be shifted, so that we have its parts + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // Though Amt shouldn't usually be 0, it's possible. E.g. when legalization + // splitted a vector shift, like this: <op1, op2> SHL <0, 2>. + if (!Amt) { + Lo = InL; + Hi = InH; + return; + } + + EVT NVT = InL.getValueType(); + unsigned VTBits = N->getValueType(0).getSizeInBits(); + unsigned NVTBits = NVT.getSizeInBits(); + EVT ShTy = N->getOperand(1).getValueType(); + + if (N->getOpcode() == ISD::SHL) { + if (Amt.ugt(VTBits)) { + Lo = Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt.ugt(NVTBits)) { + Lo = DAG.getConstant(0, DL, NVT); + Hi = DAG.getNode(ISD::SHL, DL, + NVT, InL, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + } else if (Amt == NVTBits) { + Lo = DAG.getConstant(0, DL, NVT); + Hi = InL; + } else if (Amt == 1 && + TLI.isOperationLegalOrCustom(ISD::ADDC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) { + // Emit this X << 1 as X+X. + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); + SDValue LoOps[2] = { InL, InL }; + Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps); + SDValue HiOps[3] = { InH, InH, Lo.getValue(1) }; + Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, DL, ShTy)); + Hi = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(Amt, DL, ShTy)), + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + } + return; + } + + if (N->getOpcode() == ISD::SRL) { + if (Amt.ugt(VTBits)) { + Lo = Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt.ugt(NVTBits)) { + Lo = DAG.getNode(ISD::SRL, DL, + NVT, InH, DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getConstant(0, DL, NVT); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getConstant(0, DL, NVT); + } else { + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(Amt, DL, ShTy)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); + } + return; + } + + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + if (Amt.ugt(VTBits)) { + Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits - 1, DL, ShTy)); + } else if (Amt.ugt(NVTBits)) { + Lo = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(Amt - NVTBits, DL, ShTy)); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits - 1, DL, ShTy)); + } else if (Amt == NVTBits) { + Lo = InH; + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, + DAG.getConstant(NVTBits - 1, DL, ShTy)); + } else { + Lo = DAG.getNode(ISD::OR, DL, NVT, + DAG.getNode(ISD::SRL, DL, NVT, InL, + DAG.getConstant(Amt, DL, ShTy)), + DAG.getNode(ISD::SHL, DL, NVT, InH, + DAG.getConstant(-Amt + NVTBits, DL, ShTy))); + Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, DL, ShTy)); + } +} + +/// ExpandShiftWithKnownAmountBit - Try to determine whether we can simplify +/// this shift based on knowledge of the high bit of the shift amount. If we +/// can tell this, we know that it is >= 32 or < 32, without knowing the actual +/// shift amount. +bool DAGTypeLegalizer:: +ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned ShBits = ShTy.getScalarType().getSizeInBits(); + unsigned NVTBits = NVT.getScalarType().getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + SDLoc dl(N); + + APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); + APInt KnownZero, KnownOne; + DAG.computeKnownBits(N->getOperand(1), KnownZero, KnownOne); + + // If we don't know anything about the high bits, exit. + if (((KnownZero|KnownOne) & HighBitMask) == 0) + return false; + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + // If we know that any of the high bits of the shift amount are one, then we + // can do this as a couple of simple shifts. + if (KnownOne.intersects(HighBitMask)) { + // Mask out the high bit, which we know is set. + Amt = DAG.getNode(ISD::AND, dl, ShTy, Amt, + DAG.getConstant(~HighBitMask, dl, ShTy)); + + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + Lo = DAG.getConstant(0, dl, NVT); // Low part is zero. + Hi = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part. + return true; + case ISD::SRL: + Hi = DAG.getConstant(0, dl, NVT); // Hi part is zero. + Lo = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + case ISD::SRA: + Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part. + DAG.getConstant(NVTBits - 1, dl, ShTy)); + Lo = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part. + return true; + } + } + + // If we know that all of the high bits of the shift amount are zero, then we + // can do this as a couple of simple shifts. + if ((KnownZero & HighBitMask) == HighBitMask) { + // Calculate 31-x. 31 is used instead of 32 to avoid creating an undefined + // shift if x is zero. We can use XOR here because x is known to be smaller + // than 32. + SDValue Amt2 = DAG.getNode(ISD::XOR, dl, ShTy, Amt, + DAG.getConstant(NVTBits - 1, dl, ShTy)); + + unsigned Op1, Op2; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: Op1 = ISD::SHL; Op2 = ISD::SRL; break; + case ISD::SRL: + case ISD::SRA: Op1 = ISD::SRL; Op2 = ISD::SHL; break; + } + + // When shifting right the arithmetic for Lo and Hi is swapped. + if (N->getOpcode() != ISD::SHL) + std::swap(InL, InH); + + // Use a little trick to get the bits that move from Lo to Hi. First + // shift by one bit. + SDValue Sh1 = DAG.getNode(Op2, dl, NVT, InL, DAG.getConstant(1, dl, ShTy)); + // Then compute the remaining shift with amount-1. + SDValue Sh2 = DAG.getNode(Op2, dl, NVT, Sh1, Amt2); + + Lo = DAG.getNode(N->getOpcode(), dl, NVT, InL, Amt); + Hi = DAG.getNode(ISD::OR, dl, NVT, DAG.getNode(Op1, dl, NVT, InH, Amt),Sh2); + + if (N->getOpcode() != ISD::SHL) + std::swap(Hi, Lo); + return true; + } + + return false; +} + +/// ExpandShiftWithUnknownAmountBit - Fully general expansion of integer shift +/// of any size. +bool DAGTypeLegalizer:: +ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDValue Amt = N->getOperand(1); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ShTy = Amt.getValueType(); + unsigned NVTBits = NVT.getSizeInBits(); + assert(isPowerOf2_32(NVTBits) && + "Expanded integer type size not a power of two!"); + SDLoc dl(N); + + // Get the incoming operand to be shifted. + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + + SDValue NVBitsNode = DAG.getConstant(NVTBits, dl, ShTy); + SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); + SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); + SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, NVBitsNode, ISD::SETULT); + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(ShTy), + Amt, DAG.getConstant(0, dl, ShTy), + ISD::SETEQ); + + SDValue LoS, HiS, LoL, HiL; + switch (N->getOpcode()) { + default: llvm_unreachable("Unknown shift"); + case ISD::SHL: + // Short: ShAmt < NVTBits + LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); + HiS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SHL, dl, NVT, InH, Amt), + DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack)); + + // Long: ShAmt >= NVTBits + LoL = DAG.getConstant(0, dl, NVT); // Lo part is zero. + HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. + + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isZero, InH, + DAG.getSelect(dl, NVT, isShort, HiS, HiL)); + return true; + case ISD::SRL: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + // FIXME: If Amt is zero, the following shift generates an undefined result + // on some architectures. + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getConstant(0, dl, NVT); // Hi part is zero. + LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + return true; + case ISD::SRA: + // Short: ShAmt < NVTBits + HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); + LoS = DAG.getNode(ISD::OR, dl, NVT, + DAG.getNode(ISD::SRL, dl, NVT, InL, Amt), + DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); + + // Long: ShAmt >= NVTBits + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. + DAG.getConstant(NVTBits - 1, dl, ShTy)); + LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. + + Lo = DAG.getSelect(dl, NVT, isZero, InL, + DAG.getSelect(dl, NVT, isShort, LoS, LoL)); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); + return true; + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + + EVT NVT = LHSL.getValueType(); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + // Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support + // them. TODO: Teach operation legalization how to expand unsupported + // ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate + // a carry of type MVT::Glue, but there doesn't seem to be any way to + // generate a value of this type in the expanded code sequence. + bool hasCarry = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::ADDC : ISD::SUBC, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + + if (hasCarry) { + SDVTList VTList = DAG.getVTList(NVT, MVT::Glue); + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); + } + return; + } + + bool hasOVF = + TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ? + ISD::UADDO : ISD::USUBO, + TLI.getTypeToExpandTo(*DAG.getContext(), NVT)); + if (hasOVF) { + SDVTList VTList = DAG.getVTList(NVT, NVT); + TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(NVT); + int RevOpc; + if (N->getOpcode() == ISD::ADD) { + RevOpc = ISD::SUB; + Lo = DAG.getNode(ISD::UADDO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + } else { + RevOpc = ISD::ADD; + Lo = DAG.getNode(ISD::USUBO, dl, VTList, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + } + SDValue OVF = Lo.getValue(1); + + switch (BoolType) { + case TargetLoweringBase::UndefinedBooleanContent: + OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF); + // Fallthrough + case TargetLoweringBase::ZeroOrOneBooleanContent: + Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF); + break; + case TargetLoweringBase::ZeroOrNegativeOneBooleanContent: + Hi = DAG.getNode(RevOpc, dl, NVT, Hi, OVF); + } + return; + } + + if (N->getOpcode() == ISD::ADD) { + Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::ADD, dl, NVT, makeArrayRef(HiOps, 2)); + SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], + ISD::SETULT); + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], + ISD::SETULT); + SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, + DAG.getConstant(1, dl, NVT), Carry1); + Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); + } else { + Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps); + Hi = DAG.getNode(ISD::SUB, dl, NVT, makeArrayRef(HiOps, 2)); + SDValue Cmp = + DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), + LoOps[0], LoOps[1], ISD::SETULT); + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, dl, NVT), + DAG.getConstant(0, dl, NVT)); + Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); + } +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); + SDValue LoOps[2] = { LHSL, RHSL }; + SDValue HiOps[3] = { LHSH, RHSH }; + + if (N->getOpcode() == ISD::ADDC) { + Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps); + } else { + Lo = DAG.getNode(ISD::SUBC, dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps); + } + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + // Expand the subcomponents. + SDValue LHSL, LHSH, RHSL, RHSH; + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + GetExpandedInteger(N->getOperand(1), RHSL, RHSH); + SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); + SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) }; + SDValue HiOps[3] = { LHSH, RHSH }; + + Lo = DAG.getNode(N->getOpcode(), dl, VTList, LoOps); + HiOps[2] = Lo.getValue(1); + Hi = DAG.getNode(N->getOpcode(), dl, VTList, HiOps); + + // Legalized the flag result - switch anything that used the old flag to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + +void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is any extension of the input (which degenerates to a copy). + Lo = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Op); + Hi = DAG.getUNDEF(NVT); // The high part is undefined. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertSext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertSext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part replicates the sign bit of Lo, make it explicit. + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(NVTBits - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + unsigned NVTBits = NVT.getSizeInBits(); + unsigned EVTBits = EVT.getSizeInBits(); + + if (NVTBits < EVTBits) { + Hi = DAG.getNode(ISD::AssertZext, dl, NVT, Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + EVTBits - NVTBits))); + } else { + Lo = DAG.getNode(ISD::AssertZext, dl, NVT, Lo, DAG.getValueType(EVT)); + // The high part must be zero, make it explicit. + Hi = DAG.getConstant(0, dl, NVT); + } +} + +void DAGTypeLegalizer::ExpandIntRes_BITREVERSE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BITREVERSE, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BITREVERSE, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. + Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); + Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned NBitWidth = NVT.getSizeInBits(); + auto Constant = cast<ConstantSDNode>(N); + const APInt &Cst = Constant->getAPIntValue(); + bool IsTarget = Constant->isTargetOpcode(); + bool IsOpaque = Constant->isOpaque(); + SDLoc dl(N); + Lo = DAG.getConstant(Cst.trunc(NBitWidth), dl, NVT, IsTarget, IsOpaque); + Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), dl, NVT, IsTarget, + IsOpaque); +} + +void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, + DAG.getConstant(0, dl, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); + + Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo), + DAG.getNode(ISD::CTPOP, dl, NVT, Hi)); + Hi = DAG.getConstant(0, dl, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT NVT = Lo.getValueType(); + + SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, + DAG.getConstant(0, dl, NVT), ISD::SETNE); + + SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); + SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); + + Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), dl, + NVT))); + Hi = DAG.getConstant(0, dl, NVT); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + + SDValue Op = N->getOperand(0); + if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat) + Op = GetPromotedFloat(Op); + + RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, + SDValue &Lo, SDValue &Hi) { + if (ISD::isNormalLoad(N)) { + ExpandRes_NormalLoad(N, Lo, Hi); + return; + } + + assert(ISD::isUNINDEXEDLoad(N) && "Indexed load during type legalization!"); + + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + ISD::LoadExtType ExtType = N->getExtensionType(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + bool isInvariant = N->isInvariant(); + AAMDNodes AAInfo = N->getAAInfo(); + SDLoc dl(N); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + EVT MemVT = N->getMemoryVT(); + + Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), + MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); + + // Remember the chain. + Ch = Lo.getValue(1); + + if (ExtType == ISD::SEXTLOAD) { + // The high part is obtained by SRA'ing all but one of the bits of the + // lo part. + unsigned LoSize = Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else if (ExtType == ISD::ZEXTLOAD) { + // The high part is just a zero. + Hi = DAG.getConstant(0, dl, NVT); + } else { + assert(ExtType == ISD::EXTLOAD && "Unknown extload!"); + // The high part is undefined. + Hi = DAG.getUNDEF(NVT); + } + } else if (DAG.getDataLayout().isLittleEndian()) { + // Little-endian - low bits are at low addresses. + Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), NEVT, + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + } else { + // Big-endian - high bits are at low addresses. Favor aligned loads at + // the cost of some bit-fiddling. + EVT MemVT = N->getMemoryVT(); + unsigned EBytes = MemVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + + // Load both the high bits and maybe some of the low bits. + Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), + EVT::getIntegerVT(*DAG.getContext(), + MemVT.getSizeInBits() - ExcessBits), + isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + // Load the rest of the low bits. + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer low bits from the bottom of Hi to the top of Lo. + Lo = DAG.getNode( + ISD::OR, dl, NVT, Lo, + DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout())))); + // Move high bits to the right position in Hi. + Hi = DAG.getNode(ExtType == ISD::SEXTLOAD ? ISD::SRA : ISD::SRL, dl, NVT, + Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } + } + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Ch); +} + +void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), LH, RH); +} + +void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + SDValue LL, LH, RL, RH; + GetExpandedInteger(N->getOperand(0), LL, LH); + GetExpandedInteger(N->getOperand(1), RL, RH); + + if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH)) + return; + + // If nothing else, we can make a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::MUL_I16; + else if (VT == MVT::i32) + LC = RTLIB::MUL_I32; + else if (VT == MVT::i64) + LC = RTLIB::MUL_I64; + else if (VT == MVT::i128) + LC = RTLIB::MUL_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); + + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first, + Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_READCYCLECOUNTER(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc DL(N); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDVTList VTs = DAG.getVTList(NVT, NVT, MVT::Other); + SDValue R = DAG.getNode(N->getOpcode(), DL, VTs, N->getOperand(0)); + Lo = R.getValue(0); + Hi = R.getValue(1); + ReplaceValueWith(SDValue(N, 1), R.getValue(2)); +} + +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + SDLoc dl(Node); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + +void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::SDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::SDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::SDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); + + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + // If we can emit an efficient shift operation, do so now. Check to see if + // the RHS is a constant. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) + return ExpandShiftByConstant(N, CN->getAPIntValue(), Lo, Hi); + + // If we can determine that the high bit of the shift is zero or one, even if + // the low bits are variable, emit this shift in an optimized form. + if (ExpandShiftWithKnownAmountBit(N, Lo, Hi)) + return; + + // If this target supports shift_PARTS, use it. First, map to the _PARTS opc. + unsigned PartsOpc; + if (N->getOpcode() == ISD::SHL) { + PartsOpc = ISD::SHL_PARTS; + } else if (N->getOpcode() == ISD::SRL) { + PartsOpc = ISD::SRL_PARTS; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + PartsOpc = ISD::SRA_PARTS; + } + + // Next check to see if the target supports this SHL_PARTS operation or if it + // will custom expand it. + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT); + if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) || + Action == TargetLowering::Custom) { + // Expand the subcomponents. + SDValue LHSL, LHSH; + GetExpandedInteger(N->getOperand(0), LHSL, LHSH); + EVT VT = LHSL.getValueType(); + + // If the shift amount operand is coming from a vector legalization it may + // have an illegal type. Fix that first by casting the operand, otherwise + // the new SHL_PARTS operation would need further legalization. + SDValue ShiftOp = N->getOperand(1); + EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout()); + assert(ShiftTy.getScalarType().getSizeInBits() >= + Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && + "ShiftAmountTy is too small to cover the range of this type!"); + if (ShiftOp.getValueType() != ShiftTy) + ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); + + SDValue Ops[] = { LHSL, LHSH, ShiftOp }; + Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops); + Hi = Lo.getValue(1); + return; + } + + // Otherwise, emit a libcall. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + bool isSigned; + if (N->getOpcode() == ISD::SHL) { + isSigned = false; /*sign irrelevant*/ + if (VT == MVT::i16) + LC = RTLIB::SHL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SHL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SHL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SHL_I128; + } else if (N->getOpcode() == ISD::SRL) { + isSigned = false; + if (VT == MVT::i16) + LC = RTLIB::SRL_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRL_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRL_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRL_I128; + } else { + assert(N->getOpcode() == ISD::SRA && "Unknown shift!"); + isSigned = true; + if (VT == MVT::i16) + LC = RTLIB::SRA_I16; + else if (VT == MVT::i32) + LC = RTLIB::SRA_I32; + else if (VT == MVT::i64) + LC = RTLIB::SRA_I64; + else if (VT == MVT::i128) + LC = RTLIB::SRA_I128; + } + + if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi); + return; + } + + if (!ExpandShiftWithUnknownAmountBit(N, Lo, Hi)) + llvm_unreachable("Unsupported shift!"); +} + +void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is sign extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::SIGN_EXTEND, dl, NVT, N->getOperand(0)); + // The high part is obtained by SRA'ing all but one of the bits of low part. + unsigned LoSize = NVT.getSizeInBits(); + Hi = DAG.getNode( + ISD::SRA, dl, NVT, Lo, + DAG.getConstant(LoSize - 1, dl, TLI.getPointerTy(DAG.getDataLayout()))); + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer:: +ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + GetExpandedInteger(N->getOperand(0), Lo, Hi); + EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); + + if (EVT.bitsLE(Lo.getValueType())) { + // sext_inreg the low part if needed. + Lo = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Lo.getValueType(), Lo, + N->getOperand(1)); + + // The high part gets the sign extension from the lo-part. This handles + // things like sextinreg V:i64 from i8. + Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo, + DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + } else { + // For example, extension of an i48 to an i64. Leave the low part alone, + // sext_inreg the high part. + unsigned ExcessBits = + EVT.getSizeInBits() - Lo.getValueType().getSizeInBits(); + Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi, + DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), + ExcessBits))); + } +} + +void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::SDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::SDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::SREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::SREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::SREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); + + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), + N->getOperand(0), + DAG.getConstant(NVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDLoc dl(N); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + +void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + // A divide for UMULO should be faster than a function call. + if (N->getOpcode() == ISD::UMULO) { + SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); + + SDValue MUL = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); + SplitInteger(MUL, Lo, Hi); + + // A divide for UMULO will be faster than a function call. Select to + // make sure we aren't using 0. + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), + RHS, DAG.getConstant(0, dl, VT), ISD::SETEQ); + SDValue NotZero = DAG.getSelect(dl, VT, isZero, + DAG.getConstant(1, dl, VT), RHS); + SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); + SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, + ISD::SETNE); + Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, + DAG.getConstant(0, dl, N->getValueType(1)), + Overflow); + ReplaceValueWith(SDValue(N, 1), Overflow); + return; + } + + Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); + EVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); + + // Replace this with a libcall that will check overflow. + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i32) + LC = RTLIB::MULO_I32; + else if (VT == MVT::i64) + LC = RTLIB::MULO_I64; + else if (VT == MVT::i128) + LC = RTLIB::MULO_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!"); + + SDValue Temp = DAG.CreateStackTemporary(PtrVT); + // Temporary for the overflow value, default it to zero. + SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, + DAG.getConstant(0, dl, PtrVT), Temp, + MachinePointerInfo(), false, false, 0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : N->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + } + + // Also pass the address of the overflow check. + Entry.Node = Temp; + Entry.Ty = PtrTy->getPointerTo(); + Entry.isSExt = true; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(Chain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) + .setSExtResult(); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + SplitInteger(CallInfo.first, Lo, Hi); + SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, + MachinePointerInfo(), false, false, false, 0); + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, + DAG.getConstant(0, dl, PtrVT), + ISD::SETNE); + // Use the overflow from the libcall everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + +void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(0), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UDIV_I16; + else if (VT == MVT::i32) + LC = RTLIB::UDIV_I32; + else if (VT == MVT::i64) + LC = RTLIB::UDIV_I64; + else if (VT == MVT::i128) + LC = RTLIB::UDIV_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); + + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + + if (TLI.getOperationAction(ISD::UDIVREM, VT) == TargetLowering::Custom) { + SDValue Res = DAG.getNode(ISD::UDIVREM, dl, DAG.getVTList(VT, VT), Ops); + SplitInteger(Res.getValue(1), Lo, Hi); + return; + } + + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + if (VT == MVT::i16) + LC = RTLIB::UREM_I16; + else if (VT == MVT::i32) + LC = RTLIB::UREM_I32; + else if (VT == MVT::i64) + LC = RTLIB::UREM_I64; + else if (VT == MVT::i128) + LC = RTLIB::UREM_I128; + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); + + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + SDValue Op = N->getOperand(0); + if (Op.getValueType().bitsLE(NVT)) { + // The low part is zero extension of the input (degenerates to a copy). + Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); + Hi = DAG.getConstant(0, dl, NVT); // The high part is just a zero. + } else { + // For example, extension of an i48 to an i64. The operand type necessarily + // promotes to the result type, so will end up being expanded too. + assert(getTypeAction(Op.getValueType()) == + TargetLowering::TypePromoteInteger && + "Only know how to promote this result!"); + SDValue Res = GetPromotedInteger(Op); + assert(Res.getValueType() == N->getValueType(0) && + "Operand over promoted?"); + // Split the promoted operand. This will simplify when it is expanded. + SplitInteger(Res, Lo, Hi); + unsigned ExcessBits = + Op.getValueType().getSizeInBits() - NVT.getSizeInBits(); + Hi = DAG.getZeroExtendInReg(Hi, dl, + EVT::getIntegerVT(*DAG.getContext(), + ExcessBits)); + } +} + +void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(N); + EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); +} + +//===----------------------------------------------------------------------===// +// Integer Operand Expansion +//===----------------------------------------------------------------------===// + +/// ExpandIntegerOperand - This method is called when the specified operand of +/// the specified node is found to need expansion. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need promotion or expansion as well as the specified one. +bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Expand integer operand: "; N->dump(&DAG); dbgs() << "\n"); + SDValue Res = SDValue(); + + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: + #ifndef NDEBUG + dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": "; + N->dump(&DAG); dbgs() << "\n"; + #endif + llvm_unreachable("Do not know how to expand this operator's operand!"); + + case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break; + case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break; + case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break; + case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; + case ISD::INSERT_VECTOR_ELT: Res = ExpandOp_INSERT_VECTOR_ELT(N); break; + case ISD::SCALAR_TO_VECTOR: Res = ExpandOp_SCALAR_TO_VECTOR(N); break; + case ISD::SELECT_CC: Res = ExpandIntOp_SELECT_CC(N); break; + case ISD::SETCC: Res = ExpandIntOp_SETCC(N); break; + case ISD::SETCCE: Res = ExpandIntOp_SETCCE(N); break; + case ISD::SINT_TO_FP: Res = ExpandIntOp_SINT_TO_FP(N); break; + case ISD::STORE: Res = ExpandIntOp_STORE(cast<StoreSDNode>(N), OpNo); break; + case ISD::TRUNCATE: Res = ExpandIntOp_TRUNCATE(N); break; + case ISD::UINT_TO_FP: Res = ExpandIntOp_UINT_TO_FP(N); break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: Res = ExpandIntOp_Shift(N); break; + case ISD::RETURNADDR: + case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break; + + case ISD::ATOMIC_STORE: Res = ExpandIntOp_ATOMIC_STORE(N); break; + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// IntegerExpandSetCCOperands - Expand the operands of a comparison. This code +/// is shared among BR_CC, SELECT_CC, and SETCC handlers. +void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, + SDValue &NewRHS, + ISD::CondCode &CCCode, + SDLoc dl) { + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(NewLHS, LHSLo, LHSHi); + GetExpandedInteger(NewRHS, RHSLo, RHSHi); + + if (CCCode == ISD::SETEQ || CCCode == ISD::SETNE) { + if (RHSLo == RHSHi) { + if (ConstantSDNode *RHSCST = dyn_cast<ConstantSDNode>(RHSLo)) { + if (RHSCST->isAllOnesValue()) { + // Equality comparison to -1. + NewLHS = DAG.getNode(ISD::AND, dl, + LHSLo.getValueType(), LHSLo, LHSHi); + NewRHS = RHSLo; + return; + } + } + } + + NewLHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSLo, RHSLo); + NewRHS = DAG.getNode(ISD::XOR, dl, LHSLo.getValueType(), LHSHi, RHSHi); + NewLHS = DAG.getNode(ISD::OR, dl, NewLHS.getValueType(), NewLHS, NewRHS); + NewRHS = DAG.getConstant(0, dl, NewLHS.getValueType()); + return; + } + + // If this is a comparison of the sign bit, just look at the top part. + // X > -1, x < 0 + if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(NewRHS)) + if ((CCCode == ISD::SETLT && CST->isNullValue()) || // X < 0 + (CCCode == ISD::SETGT && CST->isAllOnesValue())) { // X > -1 + NewLHS = LHSHi; + NewRHS = RHSHi; + return; + } + + // FIXME: This generated code sucks. + ISD::CondCode LowCC; + switch (CCCode) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETLT: + case ISD::SETULT: LowCC = ISD::SETULT; break; + case ISD::SETGT: + case ISD::SETUGT: LowCC = ISD::SETUGT; break; + case ISD::SETLE: + case ISD::SETULE: LowCC = ISD::SETULE; break; + case ISD::SETGE: + case ISD::SETUGE: LowCC = ISD::SETUGE; break; + } + + // Tmp1 = lo(op1) < lo(op2) // Always unsigned comparison + // Tmp2 = hi(op1) < hi(op2) // Signedness depends on operands + // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2; + + // NOTE: on targets without efficient SELECT of bools, we can always use + // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, + nullptr); + SDValue Tmp1, Tmp2; + if (TLI.isTypeLegal(LHSLo.getValueType()) && + TLI.isTypeLegal(RHSLo.getValueType())) + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); + if (!Tmp1.getNode()) + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), + LHSLo, RHSLo, LowCC); + if (TLI.isTypeLegal(LHSHi.getValueType()) && + TLI.isTypeLegal(RHSHi.getValueType())) + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); + if (!Tmp2.getNode()) + Tmp2 = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, DAG.getCondCode(CCCode)); + + ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode()); + ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode()); + if ((Tmp1C && Tmp1C->isNullValue()) || + (Tmp2C && Tmp2C->isNullValue() && + (CCCode == ISD::SETLE || CCCode == ISD::SETGE || + CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) || + (Tmp2C && Tmp2C->getAPIntValue() == 1 && + (CCCode == ISD::SETLT || CCCode == ISD::SETGT || + CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) { + // low part is known false, returns high part. + // For LE / GE, if high part is known false, ignore the low part. + // For LT / GT, if high part is known true, ignore the low part. + NewLHS = Tmp2; + NewRHS = SDValue(); + return; + } + + if (LHSHi == RHSHi) { + // Comparing the low bits is enough. + NewLHS = Tmp1; + NewRHS = SDValue(); + return; + } + + // Lower with SETCCE if the target supports it. + // FIXME: Make all targets support this, then remove the other lowering. + if (TLI.getOperationAction( + ISD::SETCCE, + TLI.getTypeToExpandTo(*DAG.getContext(), LHSLo.getValueType())) == + TargetLowering::Custom) { + // SETCCE can detect < and >= directly. For > and <=, flip operands and + // condition code. + bool FlipOperands = false; + switch (CCCode) { + case ISD::SETGT: CCCode = ISD::SETLT; FlipOperands = true; break; + case ISD::SETUGT: CCCode = ISD::SETULT; FlipOperands = true; break; + case ISD::SETLE: CCCode = ISD::SETGE; FlipOperands = true; break; + case ISD::SETULE: CCCode = ISD::SETUGE; FlipOperands = true; break; + default: break; + } + if (FlipOperands) { + std::swap(LHSLo, RHSLo); + std::swap(LHSHi, RHSHi); + } + // Perform a wide subtraction, feeding the carry from the low part into + // SETCCE. The SETCCE operation is essentially looking at the high part of + // the result of LHS - RHS. It is negative iff LHS < RHS. It is zero or + // positive iff LHS >= RHS. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBC, dl, VTList, LHSLo, RHSLo); + SDValue Res = + DAG.getNode(ISD::SETCCE, dl, getSetCCResultType(LHSLo.getValueType()), + LHSHi, RHSHi, LowCmp.getValue(1), DAG.getCondCode(CCCode)); + NewLHS = Res; + NewRHS = SDValue(); + return; + } + + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ, false, + DagCombineInfo, dl); + if (!NewLHS.getNode()) + NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), + LHSHi, RHSHi, ISD::SETEQ); + NewLHS = DAG.getSelect(dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); + NewRHS = SDValue(); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(1))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + DAG.getCondCode(CCCode), NewLHS, NewRHS, + N->getOperand(4)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, we need to compare the result + // against zero to select between true and false values. + if (!NewRHS.getNode()) { + NewRHS = DAG.getConstant(0, SDLoc(N), NewLHS.getValueType()); + CCCode = ISD::SETNE; + } + + // Update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + N->getOperand(2), N->getOperand(3), + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { + SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); + ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(2))->get(); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); + + // If ExpandSetCCOperands returned a scalar, use it. + if (!NewRHS.getNode()) { + assert(NewLHS.getValueType() == N->getValueType(0) && + "Unexpected setcc expansion!"); + return NewLHS; + } + + // Otherwise, update N to have the operands specified. + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SETCCE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + SDValue Cond = N->getOperand(3); + SDLoc dl = SDLoc(N); + + SDValue LHSLo, LHSHi, RHSLo, RHSHi; + GetExpandedInteger(LHS, LHSLo, LHSHi); + GetExpandedInteger(RHS, RHSLo, RHSHi); + + // Expand to a SUBE for the low part and a smaller SETCCE for the high. + SDVTList VTList = DAG.getVTList(LHSLo.getValueType(), MVT::Glue); + SDValue LowCmp = DAG.getNode(ISD::SUBE, dl, VTList, LHSLo, RHSLo, Carry); + return DAG.getNode(ISD::SETCCE, dl, N->getValueType(0), LHSHi, RHSHi, + LowCmp.getValue(1), Cond); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { + // The value being shifted is legal, but the shift amount is too big. + // It follows that either the result of the shift is undefined, or the + // upper half of the shift amount is zero. Just use the lower half. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(1), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { + // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This + // surely makes pretty nice problems on 8/16 bit targets. Just truncate this + // constant to valid type. + SDValue Lo, Hi; + GetExpandedInteger(N->getOperand(0), Lo, Hi); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT DstVT = N->getValueType(0); + RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this SINT_TO_FP!"); + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { + if (ISD::isNormalStore(N)) + return ExpandOp_NormalStore(N, OpNo); + + assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + + EVT VT = N->getOperand(1).getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + unsigned Alignment = N->getAlignment(); + bool isVolatile = N->isVolatile(); + bool isNonTemporal = N->isNonTemporal(); + AAMDNodes AAInfo = N->getAAInfo(); + SDLoc dl(N); + SDValue Lo, Hi; + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + if (N->getMemoryVT().bitsLE(NVT)) { + GetExpandedInteger(N->getValue(), Lo, Hi); + return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + N->getMemoryVT(), isVolatile, isNonTemporal, + Alignment, AAInfo); + } + + if (DAG.getDataLayout().isLittleEndian()) { + // Little-endian - low bits are at low addresses. + GetExpandedInteger(N->getValue(), Lo, Hi); + + Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), + isVolatile, isNonTemporal, Alignment, AAInfo); + + unsigned ExcessBits = + N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); + EVT NEVT = EVT::getIntegerVT(*DAG.getContext(), ExcessBits); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + NEVT, isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize), AAInfo); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + + // Big-endian - high bits are at low addresses. Favor aligned stores at + // the cost of some bit-fiddling. + GetExpandedInteger(N->getValue(), Lo, Hi); + + EVT ExtVT = N->getMemoryVT(); + unsigned EBytes = ExtVT.getStoreSize(); + unsigned IncrementSize = NVT.getSizeInBits()/8; + unsigned ExcessBits = (EBytes - IncrementSize)*8; + EVT HiVT = EVT::getIntegerVT(*DAG.getContext(), + ExtVT.getSizeInBits() - ExcessBits); + + if (ExcessBits < NVT.getSizeInBits()) { + // Transfer high bits from the top of Lo to the bottom of Hi. + Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi, + DAG.getConstant(NVT.getSizeInBits() - ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode( + ISD::OR, dl, NVT, Hi, + DAG.getNode(ISD::SRL, dl, NVT, Lo, + DAG.getConstant(ExcessBits, dl, + TLI.getPointerTy(DAG.getDataLayout())))); + } + + // Store both the high bits and maybe some of the low bits. + Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), + HiVT, isVolatile, isNonTemporal, Alignment, AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + // Store the lowest ExcessBits bits in the second half. + Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + EVT::getIntegerVT(*DAG.getContext(), ExcessBits), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize), AAInfo); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { + SDValue InL, InH; + GetExpandedInteger(N->getOperand(0), InL, InH); + // Just truncate the low part of the source. + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL); +} + +SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { + SDValue Op = N->getOperand(0); + EVT SrcVT = Op.getValueType(); + EVT DstVT = N->getValueType(0); + SDLoc dl(N); + + // The following optimization is valid only if every value in SrcVT (when + // treated as signed) is representable in DstVT. Check that the mantissa + // size of DstVT is >= than the number of bits in SrcVT -1. + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && + TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ + // Do a signed conversion then adjust the result. + SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); + SignedConv = TLI.LowerOperation(SignedConv, DAG); + + // The result of the signed conversion needs adjusting if the 'sign bit' of + // the incoming integer was set. To handle this, we dynamically test to see + // if it is set, and, if so, add a fudge factor. + + const uint64_t F32TwoE32 = 0x4F800000ULL; + const uint64_t F32TwoE64 = 0x5F800000ULL; + const uint64_t F32TwoE128 = 0x7F800000ULL; + + APInt FF(32, 0); + if (SrcVT == MVT::i32) + FF = APInt(32, F32TwoE32); + else if (SrcVT == MVT::i64) + FF = APInt(32, F32TwoE64); + else if (SrcVT == MVT::i128) + FF = APInt(32, F32TwoE128); + else + llvm_unreachable("Unsupported UINT_TO_FP!"); + + // Check whether the sign bit is set. + SDValue Lo, Hi; + GetExpandedInteger(Op, Lo, Hi); + SDValue SignSet = DAG.getSetCC(dl, + getSetCCResultType(Hi.getValueType()), + Hi, + DAG.getConstant(0, dl, Hi.getValueType()), + ISD::SETLT); + + // Build a 64 bit pair (0, FF) in the constant pool, with FF in the lo bits. + SDValue FudgePtr = + DAG.getConstantPool(ConstantInt::get(*DAG.getContext(), FF.zext(64)), + TLI.getPointerTy(DAG.getDataLayout())); + + // Get a pointer to FF if the sign bit was set, or to 0 otherwise. + SDValue Zero = DAG.getIntPtrConstant(0, dl); + SDValue Four = DAG.getIntPtrConstant(4, dl); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, + Zero, Four); + unsigned Alignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlignment(); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); + Alignment = std::min(Alignment, 4u); + + // Load the value out, extending it from f32 to the destination float type. + // FIXME: Avoid the extend by constructing the right constant pool? + SDValue Fudge = DAG.getExtLoad( + ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), FudgePtr, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, + false, false, false, Alignment); + return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); + } + + // Otherwise, use a libcall. + RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && + "Don't know how to expand this UINT_TO_FP!"); + return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first; +} + +SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { + SDLoc dl(N); + SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), + N->getOperand(0), + N->getOperand(1), N->getOperand(2), + cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + return Swap.getValue(1); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp0 = N->getOperand(0); + EVT InVT = InOp0.getValueType(); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned OutNumElems = OutVT.getVectorNumElements(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDLoc dl(N); + SDValue BaseIdx = N->getOperand(1); + + SmallVector<SDValue, 8> Ops; + Ops.reserve(OutNumElems); + for (unsigned i = 0; i != OutNumElems; ++i) { + + // Extract the element from the original vector. + SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), + BaseIdx, DAG.getConstant(i, dl, BaseIdx.getValueType())); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + InVT.getVectorElementType(), N->getOperand(0), Index); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext); + // Insert the converted element to the new vector. + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { + ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); + EVT VT = N->getValueType(0); + SDLoc dl(N); + + ArrayRef<int> NewMask = SV->getMask().slice(0, VT.getVectorNumElements()); + + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = GetPromotedInteger(N->getOperand(1)); + EVT OutVT = V0.getValueType(); + + return DAG.getVectorShuffle(OutVT, dl, V0, V1, NewMask); +} + + +SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + unsigned NumElems = N->getNumOperands(); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDLoc dl(N); + + SmallVector<SDValue, 8> Ops; + Ops.reserve(NumElems); + for (unsigned i = 0; i != NumElems; ++i) { + SDValue Op; + // BUILD_VECTOR integer operand types are allowed to be larger than the + // result's element type. This may still be true after the promotion. For + // example, we might be promoting (<v?i1> = BV <i32>, <i32>, ...) to + // (v?i16 = BV <i32>, <i32>, ...), and we can't any_extend <i32> to <i16>. + if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + else + Op = N->getOperand(i); + Ops.push_back(Op); + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { + + SDLoc dl(N); + + assert(!N->getOperand(0).getValueType().isVector() && + "Input must be a scalar"); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(0)); + + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { + SDLoc dl(N); + + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT InElemTy = OutVT.getVectorElementType(); + EVT OutElemTy = NOutVT.getVectorElementType(); + + unsigned NumElem = N->getOperand(0).getValueType().getVectorNumElements(); + unsigned NumOutElem = NOutVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + assert(NumElem * NumOperands == NumOutElem && + "Unexpected number of elements"); + + // Take the elements from the first vector. + SmallVector<SDValue, 8> Ops(NumOutElem); + for (unsigned i = 0; i < NumOperands; ++i) { + SDValue Op = N->getOperand(i); + for (unsigned j = 0; j < NumElem; ++j) { + SDValue Ext = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InElemTy, Op, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops); +} + +SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + assert(NOutVT.isVector() && "This type must be promoted to a vector type"); + + EVT NOutVTElem = NOutVT.getVectorElementType(); + + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + + SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, + NOutVTElem, N->getOperand(1)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NOutVT, + V0, ConvElem, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, + TLI.getVectorIdxTy(DAG.getDataLayout())); + SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + V0->getValueType(0).getScalarType(), V0, V1); + + // EXTRACT_VECTOR_ELT can return types which are wider than the incoming + // element types. If this is the case then we need to expand the outgoing + // value and not truncate it. + return DAG.getAnyExtOrTrunc(Ext, dl, N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDLoc dl(N); + SDValue V0 = GetPromotedInteger(N->getOperand(0)); + MVT InVT = V0.getValueType().getSimpleVT(); + MVT OutVT = MVT::getVectorVT(InVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OutVT, V0, N->getOperand(1)); + return DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), Ext); +} + +SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { + SDLoc dl(N); + unsigned NumElems = N->getNumOperands(); + + EVT RetSclrTy = N->getValueType(0).getVectorElementType(); + + SmallVector<SDValue, 8> NewOps; + NewOps.reserve(NumElems); + + // For each incoming vector + for (unsigned VecIdx = 0; VecIdx != NumElems; ++VecIdx) { + SDValue Incoming = GetPromotedInteger(N->getOperand(VecIdx)); + EVT SclrTy = Incoming->getValueType(0).getVectorElementType(); + unsigned NumElem = Incoming->getValueType(0).getVectorNumElements(); + + for (unsigned i=0; i<NumElem; ++i) { + // Extract element from incoming vector + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, SclrTy, Incoming, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue Tr = DAG.getNode(ISD::TRUNCATE, dl, RetSclrTy, Ex); + NewOps.push_back(Tr); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, dl, N->getValueType(0), NewOps); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp new file mode 100644 index 0000000..2a0b0aa --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -0,0 +1,1184 @@ +//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeTypes method. It transforms +// an arbitrary well-formed SelectionDAG to only consist of legal types. This +// is common code shared among the LegalizeTypes*.cpp files. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +static cl::opt<bool> +EnableExpensiveChecks("enable-legalize-types-checking", cl::Hidden); + +/// PerformExpensiveChecks - Do extensive, expensive, sanity checking. +void DAGTypeLegalizer::PerformExpensiveChecks() { + // If a node is not processed, then none of its values should be mapped by any + // of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + + // If a node is processed, then each value with an illegal type must be mapped + // by exactly one of PromotedIntegers, ExpandedIntegers, ..., ReplacedValues. + // Values with a legal type may be mapped by ReplacedValues, but not by any of + // the other maps. + + // Note that these invariants may not hold momentarily when processing a node: + // the node being processed may be put in a map before being marked Processed. + + // Note that it is possible to have nodes marked NewNode in the DAG. This can + // occur in two ways. Firstly, a node may be created during legalization but + // never passed to the legalization core. This is usually due to the implicit + // folding that occurs when using the DAG.getNode operators. Secondly, a new + // node may be passed to the legalization core, but when analyzed may morph + // into a different node, leaving the original node as a NewNode in the DAG. + // A node may morph if one of its operands changes during analysis. Whether + // it actually morphs or not depends on whether, after updating its operands, + // it is equivalent to an existing node: if so, it morphs into that existing + // node (CSE). An operand can change during analysis if the operand is a new + // node that morphs, or it is a processed value that was mapped to some other + // value (as recorded in ReplacedValues) in which case the operand is turned + // into that other value. If a node morphs then the node it morphed into will + // be used instead of it for legalization, however the original node continues + // to live on in the DAG. + // The conclusion is that though there may be nodes marked NewNode in the DAG, + // all uses of such nodes are also marked NewNode: the result is a fungus of + // NewNodes growing on top of the useful nodes, and perhaps using them, but + // not used by them. + + // If a value is mapped by ReplacedValues, then it must have no uses, except + // by nodes marked NewNode (see above). + + // The final node obtained by mapping by ReplacedValues is not marked NewNode. + // Note that ReplacedValues should be applied iteratively. + + // Note that the ReplacedValues map may also map deleted nodes (by iterating + // over the DAG we never dereference deleted nodes). This means that it may + // also map nodes marked NewNode if the deallocated memory was reallocated as + // another node, and that new node was not seen by the LegalizeTypes machinery + // (for example because it was created but not used). In general, we cannot + // distinguish between new nodes and deleted nodes. + SmallVector<SDNode*, 16> NewNodes; + for (SDNode &Node : DAG.allnodes()) { + // Remember nodes marked NewNode - they are subject to extra checking below. + if (Node.getNodeId() == NewNode) + NewNodes.push_back(&Node); + + for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) { + SDValue Res(&Node, i); + bool Failed = false; + + unsigned Mapped = 0; + if (ReplacedValues.find(Res) != ReplacedValues.end()) { + Mapped |= 1; + // Check that remapped values are only used by nodes marked NewNode. + for (SDNode::use_iterator UI = Node.use_begin(), UE = Node.use_end(); + UI != UE; ++UI) + if (UI.getUse().getResNo() == i) + assert(UI->getNodeId() == NewNode && + "Remapped value has non-trivial use!"); + + // Check that the final result of applying ReplacedValues is not + // marked NewNode. + SDValue NewVal = ReplacedValues[Res]; + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(NewVal); + while (I != ReplacedValues.end()) { + NewVal = I->second; + I = ReplacedValues.find(NewVal); + } + assert(NewVal.getNode()->getNodeId() != NewNode && + "ReplacedValues maps to a new node!"); + } + if (PromotedIntegers.find(Res) != PromotedIntegers.end()) + Mapped |= 2; + if (SoftenedFloats.find(Res) != SoftenedFloats.end()) + Mapped |= 4; + if (ScalarizedVectors.find(Res) != ScalarizedVectors.end()) + Mapped |= 8; + if (ExpandedIntegers.find(Res) != ExpandedIntegers.end()) + Mapped |= 16; + if (ExpandedFloats.find(Res) != ExpandedFloats.end()) + Mapped |= 32; + if (SplitVectors.find(Res) != SplitVectors.end()) + Mapped |= 64; + if (WidenedVectors.find(Res) != WidenedVectors.end()) + Mapped |= 128; + + if (Node.getNodeId() != Processed) { + // Since we allow ReplacedValues to map deleted nodes, it may map nodes + // marked NewNode too, since a deleted node may have been reallocated as + // another node that has not been seen by the LegalizeTypes machinery. + if ((Node.getNodeId() == NewNode && Mapped > 1) || + (Node.getNodeId() != NewNode && Mapped != 0)) { + dbgs() << "Unprocessed value in a map!"; + Failed = true; + } + } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) { + if (Mapped > 1) { + dbgs() << "Value with legal type was transformed!"; + Failed = true; + } + } else { + if (Mapped == 0) { + dbgs() << "Processed value not in any map!"; + Failed = true; + } else if (Mapped & (Mapped - 1)) { + dbgs() << "Value in multiple maps!"; + Failed = true; + } + } + + if (Failed) { + if (Mapped & 1) + dbgs() << " ReplacedValues"; + if (Mapped & 2) + dbgs() << " PromotedIntegers"; + if (Mapped & 4) + dbgs() << " SoftenedFloats"; + if (Mapped & 8) + dbgs() << " ScalarizedVectors"; + if (Mapped & 16) + dbgs() << " ExpandedIntegers"; + if (Mapped & 32) + dbgs() << " ExpandedFloats"; + if (Mapped & 64) + dbgs() << " SplitVectors"; + if (Mapped & 128) + dbgs() << " WidenedVectors"; + dbgs() << "\n"; + llvm_unreachable(nullptr); + } + } + } + + // Checked that NewNodes are only used by other NewNodes. + for (unsigned i = 0, e = NewNodes.size(); i != e; ++i) { + SDNode *N = NewNodes[i]; + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) + assert(UI->getNodeId() == NewNode && "NewNode used by non-NewNode!"); + } +} + +/// run - This is the main entry point for the type legalizer. This does a +/// top-down traversal of the dag, legalizing types as it goes. Returns "true" +/// if it made any changes. +bool DAGTypeLegalizer::run() { + bool Changed = false; + + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted, and tracking any + // changes of the root. + HandleSDNode Dummy(DAG.getRoot()); + Dummy.setNodeId(Unanalyzed); + + // The root of the dag may dangle to deleted nodes until the type legalizer is + // done. Set it to null to avoid confusion. + DAG.setRoot(SDValue()); + + // Walk all nodes in the graph, assigning them a NodeId of 'ReadyToProcess' + // (and remembering them) if they are leaves and assigning 'Unanalyzed' if + // non-leaves. + for (SDNode &Node : DAG.allnodes()) { + if (Node.getNumOperands() == 0) { + Node.setNodeId(ReadyToProcess); + Worklist.push_back(&Node); + } else { + Node.setNodeId(Unanalyzed); + } + } + + // Now that we have a set of nodes to process, handle them all. + while (!Worklist.empty()) { +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + SDNode *N = Worklist.back(); + Worklist.pop_back(); + assert(N->getNodeId() == ReadyToProcess && + "Node should be ready if on worklist!"); + + if (IgnoreNodeResults(N)) + goto ScanOperands; + + // Scan the values produced by the node, checking to see if any result + // types are illegal. + for (unsigned i = 0, NumResults = N->getNumValues(); i < NumResults; ++i) { + EVT ResultVT = N->getValueType(i); + switch (getTypeAction(ResultVT)) { + case TargetLowering::TypeLegal: + break; + // The following calls must take care of *all* of the node's results, + // not just the illegal result they were passed (this includes results + // with a legal type). Results can be remapped using ReplaceValueWith, + // or their promoted/expanded/etc values registered in PromotedIntegers, + // ExpandedIntegers etc. + case TargetLowering::TypePromoteInteger: + PromoteIntegerResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeExpandInteger: + ExpandIntegerResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeSoftenFloat: + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; + case TargetLowering::TypeExpandFloat: + ExpandFloatResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeScalarizeVector: + ScalarizeVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeSplitVector: + SplitVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypeWidenVector: + WidenVectorResult(N, i); + Changed = true; + goto NodeDone; + case TargetLowering::TypePromoteFloat: + PromoteFloatResult(N, i); + Changed = true; + goto NodeDone; + } + } + +ScanOperands: + // Scan the operand list for the node, handling any nodes with operands that + // are illegal. + { + unsigned NumOperands = N->getNumOperands(); + bool NeedsReanalyzing = false; + unsigned i; + for (i = 0; i != NumOperands; ++i) { + if (IgnoreNodeResults(N->getOperand(i).getNode())) + continue; + + EVT OpVT = N->getOperand(i).getValueType(); + switch (getTypeAction(OpVT)) { + case TargetLowering::TypeLegal: + continue; + // The following calls must either replace all of the node's results + // using ReplaceValueWith, and return "false"; or update the node's + // operands in place, and return "true". + case TargetLowering::TypePromoteInteger: + NeedsReanalyzing = PromoteIntegerOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeExpandInteger: + NeedsReanalyzing = ExpandIntegerOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeSoftenFloat: + NeedsReanalyzing = SoftenFloatOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeExpandFloat: + NeedsReanalyzing = ExpandFloatOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeScalarizeVector: + NeedsReanalyzing = ScalarizeVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeSplitVector: + NeedsReanalyzing = SplitVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypeWidenVector: + NeedsReanalyzing = WidenVectorOperand(N, i); + Changed = true; + break; + case TargetLowering::TypePromoteFloat: + NeedsReanalyzing = PromoteFloatOperand(N, i); + Changed = true; + break; + } + break; + } + + // The sub-method updated N in place. Check to see if any operands are new, + // and if so, mark them. If the node needs revisiting, don't add all users + // to the worklist etc. + if (NeedsReanalyzing) { + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(NewNode); + // Recompute the NodeId and correct processed operands, adding the node to + // the worklist if ready. + SDNode *M = AnalyzeNewNode(N); + if (M == N) + // The node didn't morph - nothing special to do, it will be revisited. + continue; + + // The node morphed - this is equivalent to legalizing by replacing every + // value of N with the corresponding value of M. So do that now. + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + // Replacing the value takes care of remapping the new value. + ReplaceValueWith(SDValue(N, i), SDValue(M, i)); + assert(N->getNodeId() == NewNode && "Unexpected node state!"); + // The node continues to live on as part of the NewNode fungus that + // grows on top of the useful nodes. Nothing more needs to be done + // with it - move on to the next node. + continue; + } + + if (i == NumOperands) { + DEBUG(dbgs() << "Legally typed node: "; N->dump(&DAG); dbgs() << "\n"); + } + } +NodeDone: + + // If we reach here, the node was processed, potentially creating new nodes. + // Mark it as processed and add its users to the worklist as appropriate. + assert(N->getNodeId() == ReadyToProcess && "Node ID recalculated?"); + N->setNodeId(Processed); + + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) { + SDNode *User = *UI; + int NodeId = User->getNodeId(); + + // This node has two options: it can either be a new node or its Node ID + // may be a count of the number of operands it has that are not ready. + if (NodeId > 0) { + User->setNodeId(NodeId-1); + + // If this was the last use it was waiting on, add it to the ready list. + if (NodeId-1 == ReadyToProcess) + Worklist.push_back(User); + continue; + } + + // If this is an unreachable new node, then ignore it. If it ever becomes + // reachable by being used by a newly created node then it will be handled + // by AnalyzeNewNode. + if (NodeId == NewNode) + continue; + + // Otherwise, this node is new: this is the first operand of it that + // became ready. Its new NodeId is the number of operands it has minus 1 + // (as this node is now processed). + assert(NodeId == Unanalyzed && "Unknown node ID!"); + User->setNodeId(User->getNumOperands() - 1); + + // If the node only has a single operand, it is now ready. + if (User->getNumOperands() == 1) + Worklist.push_back(User); + } + } + +#ifndef XDEBUG + if (EnableExpensiveChecks) +#endif + PerformExpensiveChecks(); + + // If the root changed (e.g. it was a dead load) update the root. + DAG.setRoot(Dummy.getValue()); + + // Remove dead nodes. This is important to do for cleanliness but also before + // the checking loop below. Implicit folding by the DAG.getNode operators and + // node morphing can cause unreachable nodes to be around with their flags set + // to new. + DAG.RemoveDeadNodes(); + + // In a debug build, scan all the nodes to make sure we found them all. This + // ensures that there are no cycles and that everything got processed. +#ifndef NDEBUG + for (SDNode &Node : DAG.allnodes()) { + bool Failed = false; + + // Check that all result types are legal. + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. + if (!IgnoreNodeResults(&Node)) + for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); + Failed = true; + } + + // Check that all operand types are legal. + for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) + if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); + Failed = true; + } + + if (Node.getNodeId() != Processed) { + if (Node.getNodeId() == NewNode) + dbgs() << "New node not analyzed?\n"; + else if (Node.getNodeId() == Unanalyzed) + dbgs() << "Unanalyzed node not noticed?\n"; + else if (Node.getNodeId() > 0) + dbgs() << "Operand not processed?\n"; + else if (Node.getNodeId() == ReadyToProcess) + dbgs() << "Not added to worklist?\n"; + Failed = true; + } + + if (Failed) { + Node.dump(&DAG); dbgs() << "\n"; + llvm_unreachable(nullptr); + } + } +#endif + + return Changed; +} + +/// AnalyzeNewNode - The specified node is the root of a subtree of potentially +/// new nodes. Correct any processed operands (this may change the node) and +/// calculate the NodeId. If the node itself changes to a processed node, it +/// is not remapped - the caller needs to take care of this. +/// Returns the potentially changed node. +SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { + // If this was an existing node that is already done, we're done. + if (N->getNodeId() != NewNode && N->getNodeId() != Unanalyzed) + return N; + + // Remove any stale map entries. + ExpungeNode(N); + + // Okay, we know that this node is new. Recursively walk all of its operands + // to see if they are new also. The depth of this walk is bounded by the size + // of the new tree that was constructed (usually 2-3 nodes), so we don't worry + // about revisiting of nodes. + // + // As we walk the operands, keep track of the number of nodes that are + // processed. If non-zero, this will become the new nodeid of this node. + // Operands may morph when they are analyzed. If so, the node will be + // updated after all operands have been analyzed. Since this is rare, + // the code tries to minimize overhead in the non-morphing case. + + SmallVector<SDValue, 8> NewOps; + unsigned NumProcessed = 0; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDValue OrigOp = N->getOperand(i); + SDValue Op = OrigOp; + + AnalyzeNewValue(Op); // Op may morph. + + if (Op.getNode()->getNodeId() == Processed) + ++NumProcessed; + + if (!NewOps.empty()) { + // Some previous operand changed. Add this one to the list. + NewOps.push_back(Op); + } else if (Op != OrigOp) { + // This is the first operand to change - add all operands so far. + NewOps.append(N->op_begin(), N->op_begin() + i); + NewOps.push_back(Op); + } + } + + // Some operands changed - update the node. + if (!NewOps.empty()) { + SDNode *M = DAG.UpdateNodeOperands(N, NewOps); + if (M != N) { + // The node morphed into a different node. Normally for this to happen + // the original node would have to be marked NewNode. However this can + // in theory momentarily not be the case while ReplaceValueWith is doing + // its stuff. Mark the original node NewNode to help sanity checking. + N->setNodeId(NewNode); + if (M->getNodeId() != NewNode && M->getNodeId() != Unanalyzed) + // It morphed into a previously analyzed node - nothing more to do. + return M; + + // It morphed into a different new node. Do the equivalent of passing + // it to AnalyzeNewNode: expunge it and calculate the NodeId. No need + // to remap the operands, since they are the same as the operands we + // remapped above. + N = M; + ExpungeNode(N); + } + } + + // Calculate the NodeId. + N->setNodeId(N->getNumOperands() - NumProcessed); + if (N->getNodeId() == ReadyToProcess) + Worklist.push_back(N); + + return N; +} + +/// AnalyzeNewValue - Call AnalyzeNewNode, updating the node in Val if needed. +/// If the node changes to a processed node, then remap it. +void DAGTypeLegalizer::AnalyzeNewValue(SDValue &Val) { + Val.setNode(AnalyzeNewNode(Val.getNode())); + if (Val.getNode()->getNodeId() == Processed) + // We were passed a processed node, or it morphed into one - remap it. + RemapValue(Val); +} + +/// ExpungeNode - If N has a bogus mapping in ReplacedValues, eliminate it. +/// This can occur when a node is deleted then reallocated as a new node - +/// the mapping in ReplacedValues applies to the deleted node, not the new +/// one. +/// The only map that can have a deleted node as a source is ReplacedValues. +/// Other maps can have deleted nodes as targets, but since their looked-up +/// values are always immediately remapped using RemapValue, resulting in a +/// not-deleted node, this is harmless as long as ReplacedValues/RemapValue +/// always performs correct mappings. In order to keep the mapping correct, +/// ExpungeNode should be called on any new nodes *before* adding them as +/// either source or target to ReplacedValues (which typically means calling +/// Expunge when a new node is first seen, since it may no longer be marked +/// NewNode by the time it is added to ReplacedValues). +void DAGTypeLegalizer::ExpungeNode(SDNode *N) { + if (N->getNodeId() != NewNode) + return; + + // If N is not remapped by ReplacedValues then there is nothing to do. + unsigned i, e; + for (i = 0, e = N->getNumValues(); i != e; ++i) + if (ReplacedValues.find(SDValue(N, i)) != ReplacedValues.end()) + break; + + if (i == e) + return; + + // Remove N from all maps - this is expensive but rare. + + for (DenseMap<SDValue, SDValue>::iterator I = PromotedIntegers.begin(), + E = PromotedIntegers.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = SoftenedFloats.begin(), + E = SoftenedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ScalarizedVectors.begin(), + E = ScalarizedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = WidenedVectors.begin(), + E = WidenedVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedIntegers.begin(), E = ExpandedIntegers.end(); I != E; ++I){ + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = ExpandedFloats.begin(), E = ExpandedFloats.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, std::pair<SDValue, SDValue> >::iterator + I = SplitVectors.begin(), E = SplitVectors.end(); I != E; ++I) { + assert(I->first.getNode() != N); + RemapValue(I->second.first); + RemapValue(I->second.second); + } + + for (DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.begin(), + E = ReplacedValues.end(); I != E; ++I) + RemapValue(I->second); + + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + ReplacedValues.erase(SDValue(N, i)); +} + +/// RemapValue - If the specified value was already legalized to another value, +/// replace it by that value. +void DAGTypeLegalizer::RemapValue(SDValue &N) { + DenseMap<SDValue, SDValue>::iterator I = ReplacedValues.find(N); + if (I != ReplacedValues.end()) { + // Use path compression to speed up future lookups if values get multiply + // replaced with other values. + RemapValue(I->second); + N = I->second; + + // Note that it is possible to have N.getNode()->getNodeId() == NewNode at + // this point because it is possible for a node to be put in the map before + // being processed. + } +} + +namespace { + /// NodeUpdateListener - This class is a DAGUpdateListener that listens for + /// updates to nodes and recomputes their ready state. + class NodeUpdateListener : public SelectionDAG::DAGUpdateListener { + DAGTypeLegalizer &DTL; + SmallSetVector<SDNode*, 16> &NodesToAnalyze; + public: + explicit NodeUpdateListener(DAGTypeLegalizer &dtl, + SmallSetVector<SDNode*, 16> &nta) + : SelectionDAG::DAGUpdateListener(dtl.getDAG()), + DTL(dtl), NodesToAnalyze(nta) {} + + void NodeDeleted(SDNode *N, SDNode *E) override { + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + // It is possible, though rare, for the deleted node N to occur as a + // target in a map, so note the replacement N -> E in ReplacedValues. + assert(E && "Node not replaced?"); + DTL.NoteDeletion(N, E); + + // In theory the deleted node could also have been scheduled for analysis. + // So remove it from the set of nodes which will be analyzed. + NodesToAnalyze.remove(N); + + // In general nothing needs to be done for E, since it didn't change but + // only gained new uses. However N -> E was just added to ReplacedValues, + // and the result of a ReplacedValues mapping is not allowed to be marked + // NewNode. So if E is marked NewNode, then it needs to be analyzed. + if (E->getNodeId() == DAGTypeLegalizer::NewNode) + NodesToAnalyze.insert(E); + } + + void NodeUpdated(SDNode *N) override { + // Node updates can mean pretty much anything. It is possible that an + // operand was set to something already processed (f.e.) in which case + // this node could become ready. Recompute its flags. + assert(N->getNodeId() != DAGTypeLegalizer::ReadyToProcess && + N->getNodeId() != DAGTypeLegalizer::Processed && + "Invalid node ID for RAUW deletion!"); + N->setNodeId(DAGTypeLegalizer::NewNode); + NodesToAnalyze.insert(N); + } + }; +} + + +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { + assert(From.getNode() != To.getNode() && "Potential legalization loop!"); + + // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); + AnalyzeNewValue(To); // Expunges To. + + // Anything that used the old node should now use the new one. Note that this + // can potentially cause recursive merging. + SmallSetVector<SDNode*, 16> NodesToAnalyze; + NodeUpdateListener NUL(*this, NodesToAnalyze); + do { + DAG.ReplaceAllUsesOfValueWith(From, To); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; + + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal); + // OldVal may be a target of the ReplacedValues map which was marked + // NewNode to force reanalysis because it was updated. Ensure that + // anything that ReplacedValues mapped to OldVal will now be mapped + // all the way to NewVal. + ReplacedValues[OldVal] = NewVal; + } + // The original node continues to exist in the DAG, marked NewNode. + } + } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); +} + +void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted integer"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedIntegers[Op]; + assert(!OpEntry.getNode() && "Node is already promoted!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Invalid type for softened float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = SoftenedFloats[Op]; + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetPromotedFloat(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for promoted float"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = PromotedFloats[Op]; + assert(!OpEntry.getNode() && "Node is already promoted!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { + // Note that in some cases vector operation operands may be greater than + // the vector element type. For example BUILD_VECTOR of type <1 x i1> with + // a constant i8 operand. + assert(Result.getValueType().getSizeInBits() >= + Op.getValueType().getVectorElementType().getSizeInBits() && + "Invalid type for scalarized vector"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = ScalarizedVectors[Op]; + assert(!OpEntry.getNode() && "Node is already scalarized!"); + OpEntry = Result; +} + +void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded integer"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedIntegers[Op]; + assert(!Entry.first.getNode() && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't expanded"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for expanded float"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = ExpandedFloats[Op]; + assert(!Entry.first.getNode() && "Node already expanded"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, + SDValue &Hi) { + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + RemapValue(Entry.first); + RemapValue(Entry.second); + assert(Entry.first.getNode() && "Operand isn't split"); + Lo = Entry.first; + Hi = Entry.second; +} + +void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, + SDValue Hi) { + assert(Lo.getValueType().getVectorElementType() == + Op.getValueType().getVectorElementType() && + 2*Lo.getValueType().getVectorNumElements() == + Op.getValueType().getVectorNumElements() && + Hi.getValueType() == Lo.getValueType() && + "Invalid type for split vector"); + // Lo/Hi may have been newly allocated, if so, add nodeid's as relevant. + AnalyzeNewValue(Lo); + AnalyzeNewValue(Hi); + + // Remember that this is the result of the node. + std::pair<SDValue, SDValue> &Entry = SplitVectors[Op]; + assert(!Entry.first.getNode() && "Node already split"); + Entry.first = Lo; + Entry.second = Hi; +} + +void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { + assert(Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + "Invalid type for widened vector"); + AnalyzeNewValue(Result); + + SDValue &OpEntry = WidenedVectors[Op]; + assert(!OpEntry.getNode() && "Node already widened!"); + OpEntry = Result; +} + + +//===----------------------------------------------------------------------===// +// Utilities. +//===----------------------------------------------------------------------===// + +/// BitConvertToInteger - Convert to an integer of the same size. +SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) { + unsigned BitWidth = Op.getValueType().getSizeInBits(); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), + EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op); +} + +/// BitConvertVectorToIntegerVector - Convert to a vector of integers of the +/// same size. +SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) { + assert(Op.getValueType().isVector() && "Only applies to vectors!"); + unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits(); + EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth); + unsigned NumElts = Op.getValueType().getVectorNumElements(); + return DAG.getNode(ISD::BITCAST, SDLoc(Op), + EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op); +} + +SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, + EVT DestVT) { + SDLoc dl(Op); + // Create the stack frame object. Make sure it is aligned for both + // the source and destination types. + SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT); + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, + MachinePointerInfo(), false, false, 0); + // Result is a load from the stack slot. + return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); +} + +/// CustomLowerNode - Replace the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +/// The last parameter is FALSE if we are dealing with a node with legal +/// result types and illegal operand. The second parameter denotes the type of +/// illegal OperandNo in that case. +/// The last parameter being TRUE means we are dealing with a +/// node with illegal result types. The second parameter denotes the type of +/// illegal ResNo in that case. +bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + if (LegalizeResult) + TLI.ReplaceNodeResults(N, Results, DAG); + else + TLI.LowerOperationWrapper(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom lower it after all. + return false; + + // When called from DAGTypeLegalizer::ExpandIntegerResult, we might need to + // provide the same kind of custom splitting behavior. + if (Results.size() == N->getNumValues() + 1 && LegalizeResult) { + // We've legalized a return type by splitting it. If there is a chain, + // replace that too. + SetExpandedInteger(SDValue(N, 0), Results[0], Results[1]); + if (N->getNumValues() > 1) + ReplaceValueWith(SDValue(N, 1), Results[2]); + return true; + } + + // Make everything that once used N's values now use those in Results instead. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) { + ReplaceValueWith(SDValue(N, i), Results[i]); + } + return true; +} + + +/// CustomWidenLowerNode - Widen the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector<SDValue, 8> Results; + TLI.ReplaceNodeResults(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom widen lower its result after all. + return false; + + // Update the widening map. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + SetWidenedVector(SDValue(N, i), Results[i]); + return true; +} + +SDValue DAGTypeLegalizer::DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo) { + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (i != ResNo) + ReplaceValueWith(SDValue(N, i), SDValue(N->getOperand(i))); + return SDValue(N->getOperand(ResNo)); +} + +/// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and +/// high parts of the given value. +void DAGTypeLegalizer::GetPairElements(SDValue Pair, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(Pair); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(0, dl)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, + DAG.getIntPtrConstant(1, dl)); +} + +SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, + SDValue Index) { + SDLoc dl(Index); + // Make sure the index type is big enough to compute in. + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy(DAG.getDataLayout())); + + // Calculate the element offset and add it to the pointer. + unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. + assert(EltSize * 8 == EltVT.getSizeInBits() && + "Converting bits to bytes lost precision"); + + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), Index, + DAG.getConstant(EltSize, dl, Index.getValueType())); + return DAG.getNode(ISD::ADD, dl, Index.getValueType(), Index, VecPtr); +} + +/// JoinIntegers - Build an integer with low bits Lo and high bits Hi. +SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { + // Arbitrarily use dlHi for result SDLoc + SDLoc dlHi(Hi); + SDLoc dlLo(Lo); + EVT LVT = Lo.getValueType(); + EVT HVT = Hi.getValueType(); + EVT NVT = EVT::getIntegerVT(*DAG.getContext(), + LVT.getSizeInBits() + HVT.getSizeInBits()); + + Lo = DAG.getNode(ISD::ZERO_EXTEND, dlLo, NVT, Lo); + Hi = DAG.getNode(ISD::ANY_EXTEND, dlHi, NVT, Hi); + Hi = DAG.getNode(ISD::SHL, dlHi, NVT, Hi, + DAG.getConstant(LVT.getSizeInBits(), dlHi, + TLI.getPointerTy(DAG.getDataLayout()))); + return DAG.getNode(ISD::OR, dlHi, NVT, Lo, Hi); +} + +/// LibCallify - Convert the node into a libcall with the same prototype. +SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, + bool isSigned) { + unsigned NumOps = N->getNumOperands(); + SDLoc dl(N); + if (NumOps == 0) { + return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned, + dl).first; + } else if (NumOps == 1) { + SDValue Op = N->getOperand(0); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned, + dl).first; + } else if (NumOps == 2) { + SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, + dl).first; + } + SmallVector<SDValue, 8> Ops(NumOps); + for (unsigned i = 0; i < NumOps; ++i) + Ops[i] = N->getOperand(i); + + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first; +} + +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair<SDValue, SDValue> +DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy(DAG.getDataLayout())); + + Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setSExtResult(isSigned).setZExtResult(!isSigned); + + std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); + + return CallInfo; +} + +/// PromoteTargetBoolean - Promote the given target boolean to a target boolean +/// of the given type. A target boolean is an integer value, not necessarily of +/// type i1, the bits of which conform to getBooleanContents. +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { + SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); +} + +/// WidenTargetBoolean - Widen the given target boolean to a target boolean +/// of the given type. The boolean vector is widened and then promoted to match +/// the target boolean type of the given ValVT. +SDValue DAGTypeLegalizer::WidenTargetBoolean(SDValue Bool, EVT ValVT, + bool WithZeroes) { + SDLoc dl(Bool); + EVT BoolVT = Bool.getValueType(); + + assert(ValVT.getVectorNumElements() > BoolVT.getVectorNumElements() && + TLI.isTypeLegal(ValVT) && + "Unexpected types in WidenTargetBoolean"); + EVT WideVT = EVT::getVectorVT(*DAG.getContext(), BoolVT.getScalarType(), + ValVT.getVectorNumElements()); + Bool = ModifyToType(Bool, WideVT, WithZeroes); + return PromoteTargetBoolean(Bool, ValVT); +} + +/// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT +/// bits in Hi. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi) { + SDLoc dl(Op); + assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == + Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); + Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); + Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op, + DAG.getConstant(LoVT.getSizeInBits(), dl, + TLI.getPointerTy(DAG.getDataLayout()))); + Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); +} + +/// SplitInteger - Return the lower and upper halves of Op's bits in a value +/// type half the size of Op's. +void DAGTypeLegalizer::SplitInteger(SDValue Op, + SDValue &Lo, SDValue &Hi) { + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), + Op.getValueType().getSizeInBits()/2); + SplitInteger(Op, HalfVT, HalfVT, Lo, Hi); +} + + +//===----------------------------------------------------------------------===// +// Entry Point +//===----------------------------------------------------------------------===// + +/// LegalizeTypes - This transforms the SelectionDAG into a SelectionDAG that +/// only uses types natively supported by the target. Returns "true" if it made +/// any changes. +/// +/// Note that this is an involved process that may invalidate pointers into +/// the graph. +bool SelectionDAG::LegalizeTypes() { + return DAGTypeLegalizer(*this).run(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h new file mode 100644 index 0000000..8ba19f7 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -0,0 +1,864 @@ +//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DAGTypeLegalizer class. This is a private interface +// shared between the code that implements the SelectionDAG::LegalizeTypes +// method. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_LEGALIZETYPES_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" + +namespace llvm { + +//===----------------------------------------------------------------------===// +/// DAGTypeLegalizer - This takes an arbitrary SelectionDAG as input and hacks +/// on it until only value types the target machine can handle are left. This +/// involves promoting small sizes to large sizes or splitting up large values +/// into small values. +/// +class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { + const TargetLowering &TLI; + SelectionDAG &DAG; +public: + // NodeIdFlags - This pass uses the NodeId on the SDNodes to hold information + // about the state of the node. The enum has all the values. + enum NodeIdFlags { + /// ReadyToProcess - All operands have been processed, so this node is ready + /// to be handled. + ReadyToProcess = 0, + + /// NewNode - This is a new node, not before seen, that was created in the + /// process of legalizing some other node. + NewNode = -1, + + /// Unanalyzed - This node's ID needs to be set to the number of its + /// unprocessed operands. + Unanalyzed = -2, + + /// Processed - This is a node that has already been processed. + Processed = -3 + + // 1+ - This is a node which has this many unprocessed operands. + }; +private: + + /// ValueTypeActions - This is a bitvector that contains two bits for each + /// simple value type, where the two bits correspond to the LegalizeAction + /// enum from TargetLowering. This can be queried with "getTypeAction(VT)". + TargetLowering::ValueTypeActionImpl ValueTypeActions; + + /// getTypeAction - Return how we should legalize values of this type. + TargetLowering::LegalizeTypeAction getTypeAction(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT); + } + + /// isTypeLegal - Return true if this type is legal on this target. + bool isTypeLegal(EVT VT) const { + return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; + } + + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + } + + /// IgnoreNodeResults - Pretend all of this node's results are legal. + bool IgnoreNodeResults(SDNode *N) const { + return N->getOpcode() == ISD::TargetConstant; + } + + /// PromotedIntegers - For integer nodes that are below legal width, this map + /// indicates what promoted value to use. + SmallDenseMap<SDValue, SDValue, 8> PromotedIntegers; + + /// ExpandedIntegers - For integer nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedIntegers; + + /// SoftenedFloats - For floating point nodes converted to integers of + /// the same size, this map indicates the converted value to use. + SmallDenseMap<SDValue, SDValue, 8> SoftenedFloats; + + /// PromotedFloats - For floating point nodes that have a smaller precision + /// than the smallest supported precision, this map indicates what promoted + /// value to use. + SmallDenseMap<SDValue, SDValue, 8> PromotedFloats; + + /// ExpandedFloats - For float nodes that need to be expanded this map + /// indicates which operands are the expanded version of the input. + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> ExpandedFloats; + + /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the + /// scalar value of type 'ty' to use. + SmallDenseMap<SDValue, SDValue, 8> ScalarizedVectors; + + /// SplitVectors - For nodes that need to be split this map indicates + /// which operands are the expanded version of the input. + SmallDenseMap<SDValue, std::pair<SDValue, SDValue>, 8> SplitVectors; + + /// WidenedVectors - For vector nodes that need to be widened, indicates + /// the widened value to use. + SmallDenseMap<SDValue, SDValue, 8> WidenedVectors; + + /// ReplacedValues - For values that have been replaced with another, + /// indicates the replacement value to use. + SmallDenseMap<SDValue, SDValue, 8> ReplacedValues; + + /// Worklist - This defines a worklist of nodes to process. In order to be + /// pushed onto this worklist, all operands of a node must have already been + /// processed. + SmallVector<SDNode*, 128> Worklist; + +public: + explicit DAGTypeLegalizer(SelectionDAG &dag) + : TLI(dag.getTargetLoweringInfo()), DAG(dag), + ValueTypeActions(TLI.getValueTypeActions()) { + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); + } + + /// run - This is the main entry point for the type legalizer. This does a + /// top-down traversal of the dag, legalizing types as it goes. Returns + /// "true" if it made any changes. + bool run(); + + void NoteDeletion(SDNode *Old, SDNode *New) { + ExpungeNode(Old); + ExpungeNode(New); + for (unsigned i = 0, e = Old->getNumValues(); i != e; ++i) + ReplacedValues[SDValue(Old, i)] = SDValue(New, i); + } + + SelectionDAG &getDAG() const { return DAG; } + +private: + SDNode *AnalyzeNewNode(SDNode *N); + void AnalyzeNewValue(SDValue &Val); + void ExpungeNode(SDNode *N); + void PerformExpensiveChecks(); + void RemapValue(SDValue &N); + + // Common routines. + SDValue BitConvertToInteger(SDValue Op); + SDValue BitConvertVectorToIntegerVector(SDValue Op); + SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); + bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + bool CustomWidenLowerNode(SDNode *N, EVT VT); + + /// DisintegrateMERGE_VALUES - Replace each result of the given MERGE_VALUES + /// node with the corresponding input operand, except for the result 'ResNo', + /// for which the corresponding input operand is returned. + SDValue DisintegrateMERGE_VALUES(SDNode *N, unsigned ResNo); + + SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); + SDValue JoinIntegers(SDValue Lo, SDValue Hi); + SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); + + std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); + std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); + + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); + + /// Modify Bit Vector to match SetCC result type of ValVT. + /// The bit vector is widened with zeroes when WithZeroes is true. + SDValue WidenTargetBoolean(SDValue Bool, EVT ValVT, bool WithZeroes = false); + + void ReplaceValueWith(SDValue From, SDValue To); + void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, + SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Integer Promotion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetPromotedInteger - Given a processed operand Op which was promoted to a + /// larger integer type, this returns the promoted value. The low bits of the + /// promoted value corresponding to the original type are exactly equal to Op. + /// The extra bits contain rubbish, so the promoted value may need to be zero- + /// or sign-extended from the original type before it is usable (the helpers + /// SExtPromotedInteger and ZExtPromotedInteger can do this for you). + /// For example, if Op is an i16 and was promoted to an i32, then this method + /// returns an i32, the lower 16 bits of which coincide with Op, and the upper + /// 16 bits of which contain rubbish. + SDValue GetPromotedInteger(SDValue Op) { + SDValue &PromotedOp = PromotedIntegers[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedInteger(SDValue Op, SDValue Result); + + /// SExtPromotedInteger - Get a promoted operand and sign extend it to the + /// final size. + SDValue SExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); + } + + /// ZExtPromotedInteger - Get a promoted operand and zero extend it to the + /// final size. + SDValue ZExtPromotedInteger(SDValue Op) { + EVT OldVT = Op.getValueType(); + SDLoc dl(Op); + Op = GetPromotedInteger(Op); + return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); + } + + // Integer Result Promotion. + void PromoteIntegerResult(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_AssertSext(SDNode *N); + SDValue PromoteIntRes_AssertZext(SDNode *N); + SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); + SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); + SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); + SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); + SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N); + SDValue PromoteIntRes_BITCAST(SDNode *N); + SDValue PromoteIntRes_BSWAP(SDNode *N); + SDValue PromoteIntRes_BITREVERSE(SDNode *N); + SDValue PromoteIntRes_BUILD_PAIR(SDNode *N); + SDValue PromoteIntRes_Constant(SDNode *N); + SDValue PromoteIntRes_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntRes_CTLZ(SDNode *N); + SDValue PromoteIntRes_CTPOP(SDNode *N); + SDValue PromoteIntRes_CTTZ(SDNode *N); + SDValue PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntRes_FP_TO_XINT(SDNode *N); + SDValue PromoteIntRes_FP_TO_FP16(SDNode *N); + SDValue PromoteIntRes_INT_EXTEND(SDNode *N); + SDValue PromoteIntRes_LOAD(LoadSDNode *N); + SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); + SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); + SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_SELECT(SDNode *N); + SDValue PromoteIntRes_VSELECT(SDNode *N); + SDValue PromoteIntRes_SELECT_CC(SDNode *N); + SDValue PromoteIntRes_SETCC(SDNode *N); + SDValue PromoteIntRes_SHL(SDNode *N); + SDValue PromoteIntRes_SimpleIntBinOp(SDNode *N); + SDValue PromoteIntRes_ZExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SExtIntBinOp(SDNode *N); + SDValue PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N); + SDValue PromoteIntRes_SRA(SDNode *N); + SDValue PromoteIntRes_SRL(SDNode *N); + SDValue PromoteIntRes_TRUNCATE(SDNode *N); + SDValue PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo); + SDValue PromoteIntRes_UNDEF(SDNode *N); + SDValue PromoteIntRes_VAARG(SDNode *N); + SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo); + + // Integer Operand Promotion. + bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue PromoteIntOp_ANY_EXTEND(SDNode *N); + SDValue PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N); + SDValue PromoteIntOp_BITCAST(SDNode *N); + SDValue PromoteIntOp_BUILD_PAIR(SDNode *N); + SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_BUILD_VECTOR(SDNode *N); + SDValue PromoteIntOp_CONVERT_RNDSAT(SDNode *N); + SDValue PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N); + SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N); + SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteIntOp_Shift(SDNode *N); + SDValue PromoteIntOp_SIGN_EXTEND(SDNode *N); + SDValue PromoteIntOp_SINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_TRUNCATE(SDNode *N); + SDValue PromoteIntOp_UINT_TO_FP(SDNode *N); + SDValue PromoteIntOp_ZERO_EXTEND(SDNode *N); + SDValue PromoteIntOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MLOAD(MaskedLoadSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue PromoteIntOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + + void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code); + + //===--------------------------------------------------------------------===// + // Integer Expansion Support: LegalizeIntegerTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedInteger - Given a processed operand Op which was expanded into + /// two integers of half the size, this returns the two halves. The low bits + /// of Op are exactly equal to the bits of Lo; the high bits exactly equal Hi. + /// For example, if Op is an i64 which was expanded into two i32's, then this + /// method returns the two i32's, with Lo being equal to the lower 32 bits of + /// Op, and Hi being equal to the upper 32 bits. + void GetExpandedInteger(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedInteger(SDValue Op, SDValue Lo, SDValue Hi); + + // Integer Result Expansion. + void ExpandIntegerResult(SDNode *N, unsigned ResNo); + void ExpandIntRes_ANY_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_LOAD (LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_READCYCLECOUNTER (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_TRUNCATE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ZERO_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_ADDSUBE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BITREVERSE (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_BSWAP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_MUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + + void ExpandShiftByConstant(SDNode *N, const APInt &Amt, + SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + bool ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Integer Operand Expansion. + bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandIntOp_BR_CC(SDNode *N); + SDValue ExpandIntOp_SELECT_CC(SDNode *N); + SDValue ExpandIntOp_SETCC(SDNode *N); + SDValue ExpandIntOp_SETCCE(SDNode *N); + SDValue ExpandIntOp_Shift(SDNode *N); + SDValue ExpandIntOp_SINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ExpandIntOp_TRUNCATE(SDNode *N); + SDValue ExpandIntOp_UINT_TO_FP(SDNode *N); + SDValue ExpandIntOp_RETURNADDR(SDNode *N); + SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); + + void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, SDLoc dl); + + //===--------------------------------------------------------------------===// + // Float to Integer Conversion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. + SDValue GetSoftenedFloat(SDValue Op) { + SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; + RemapValue(SoftenedOp); + assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); + return SoftenedOp; + } + void SetSoftenedFloat(SDValue Op, SDValue Result); + + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FMINNUM(SDNode *N); + SDValue SoftenFloatRes_FMAXNUM(SDNode *N); + SDValue SoftenFloatRes_FADD(SDNode *N); + SDValue SoftenFloatRes_FCEIL(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FCOS(SDNode *N); + SDValue SoftenFloatRes_FDIV(SDNode *N); + SDValue SoftenFloatRes_FEXP(SDNode *N); + SDValue SoftenFloatRes_FEXP2(SDNode *N); + SDValue SoftenFloatRes_FFLOOR(SDNode *N); + SDValue SoftenFloatRes_FLOG(SDNode *N); + SDValue SoftenFloatRes_FLOG2(SDNode *N); + SDValue SoftenFloatRes_FLOG10(SDNode *N); + SDValue SoftenFloatRes_FMA(SDNode *N); + SDValue SoftenFloatRes_FMUL(SDNode *N); + SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); + SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); + SDValue SoftenFloatRes_FP_ROUND(SDNode *N); + SDValue SoftenFloatRes_FPOW(SDNode *N); + SDValue SoftenFloatRes_FPOWI(SDNode *N); + SDValue SoftenFloatRes_FREM(SDNode *N); + SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); + SDValue SoftenFloatRes_FSIN(SDNode *N); + SDValue SoftenFloatRes_FSQRT(SDNode *N); + SDValue SoftenFloatRes_FSUB(SDNode *N); + SDValue SoftenFloatRes_FTRUNC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_UNDEF(SDNode *N); + SDValue SoftenFloatRes_VAARG(SDNode *N); + SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); + + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. + bool SoftenFloatOperand(SDNode *N, unsigned OpNo); + SDValue SoftenFloatOp_BITCAST(SDNode *N); + SDValue SoftenFloatOp_BR_CC(SDNode *N); + SDValue SoftenFloatOp_FP_EXTEND(SDNode *N); + SDValue SoftenFloatOp_FP_ROUND(SDNode *N); + SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N); + SDValue SoftenFloatOp_SELECT_CC(SDNode *N); + SDValue SoftenFloatOp_SETCC(SDNode *N); + SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Float Expansion Support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetExpandedFloat - Given a processed operand Op which was expanded into + /// two floating point values of half the size, this returns the two halves. + /// The low bits of Op are exactly equal to the bits of Lo; the high bits + /// exactly equal Hi. For example, if Op is a ppcf128 which was expanded + /// into two f64's, then this method returns the two f64's, with Lo being + /// equal to the lower 64 bits of Op, and Hi to the upper 64 bits. + void GetExpandedFloat(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetExpandedFloat(SDValue Op, SDValue Lo, SDValue Hi); + + // Float Result Expansion. + void ExpandFloatResult(SDNode *N, unsigned ResNo); + void ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMINNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMAXNUM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FADD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCEIL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOPYSIGN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FCOS (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FDIV (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FEXP2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FFLOOR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG2 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FLOG10 (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMA (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FMUL (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FNEG (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FTRUNC (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue &Hi); + + // Float Operand Expansion. + bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); + SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); + SDValue ExpandFloatOp_FP_ROUND(SDNode *N); + SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_SELECT_CC(SDNode *N); + SDValue ExpandFloatOp_SETCC(SDNode *N); + SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); + + void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, SDLoc dl); + + + //===--------------------------------------------------------------------===// + // Float promotion support: LegalizeFloatTypes.cpp + //===--------------------------------------------------------------------===// + + SDValue GetPromotedFloat(SDValue Op) { + SDValue &PromotedOp = PromotedFloats[Op]; + RemapValue(PromotedOp); + assert(PromotedOp.getNode() && "Operand wasn't promoted?"); + return PromotedOp; + } + void SetPromotedFloat(SDValue Op, SDValue Result); + + void PromoteFloatResult(SDNode *N, unsigned ResNo); + SDValue PromoteFloatRes_BITCAST(SDNode *N); + SDValue PromoteFloatRes_BinOp(SDNode *N); + SDValue PromoteFloatRes_ConstantFP(SDNode *N); + SDValue PromoteFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); + SDValue PromoteFloatRes_FMAD(SDNode *N); + SDValue PromoteFloatRes_FPOWI(SDNode *N); + SDValue PromoteFloatRes_FP_ROUND(SDNode *N); + SDValue PromoteFloatRes_LOAD(SDNode *N); + SDValue PromoteFloatRes_SELECT(SDNode *N); + SDValue PromoteFloatRes_SELECT_CC(SDNode *N); + SDValue PromoteFloatRes_UnaryOp(SDNode *N); + SDValue PromoteFloatRes_UNDEF(SDNode *N); + SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N); + + bool PromoteFloatOperand(SDNode *N, unsigned ResNo); + SDValue PromoteFloatOp_BITCAST(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FCOPYSIGN(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_EXTEND(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_FP_TO_XINT(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Scalarization Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetScalarizedVector - Given a processed one-element vector Op which was + /// scalarized to its element type, this returns the element. For example, + /// if Op is a v1i32, Op = < i32 val >, this method returns val, an i32. + SDValue GetScalarizedVector(SDValue Op) { + SDValue &ScalarizedOp = ScalarizedVectors[Op]; + RemapValue(ScalarizedOp); + assert(ScalarizedOp.getNode() && "Operand wasn't scalarized?"); + return ScalarizedOp; + } + void SetScalarizedVector(SDValue Op, SDValue Result); + + // Vector Result Scalarization: <1 x ty> -> ty. + void ScalarizeVectorResult(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo); + SDValue ScalarizeVecRes_BinOp(SDNode *N); + SDValue ScalarizeVecRes_TernaryOp(SDNode *N); + SDValue ScalarizeVecRes_UnaryOp(SDNode *N); + SDValue ScalarizeVecRes_InregOp(SDNode *N); + + SDValue ScalarizeVecRes_BITCAST(SDNode *N); + SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); + SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); + SDValue ScalarizeVecRes_FPOWI(SDNode *N); + SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); + SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N); + SDValue ScalarizeVecRes_VSELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT(SDNode *N); + SDValue ScalarizeVecRes_SELECT_CC(SDNode *N); + SDValue ScalarizeVecRes_SETCC(SDNode *N); + SDValue ScalarizeVecRes_UNDEF(SDNode *N); + SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N); + SDValue ScalarizeVecRes_VSETCC(SDNode *N); + + // Vector Operand Scalarization: <1 x ty> -> ty. + bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_BITCAST(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); + SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue ScalarizeVecOp_VSELECT(SDNode *N); + SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo); + + //===--------------------------------------------------------------------===// + // Vector Splitting Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetSplitVector - Given a processed vector Op which was split into vectors + /// of half the size, this method returns the halves. The first elements of + /// Op coincide with the elements of Lo; the remaining elements of Op coincide + /// with the elements of Hi: Op is what you would get by concatenating Lo and + /// Hi. For example, if Op is a v8i32 that was split into two v4i32's, then + /// this method returns the two v4i32's, with Lo corresponding to the first 4 + /// elements of Op, and Hi to the last 4 elements. + void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi); + void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi); + + // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>. + void SplitVectorResult(SDNode *N, unsigned OpNo); + void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); + + void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MLOAD(MaskedLoadSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_MGATHER(MaskedGatherSDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, + SDValue &Hi); + + // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. + bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_UnaryOp(SDNode *N); + SDValue SplitVecOp_TruncateHelper(SDNode *N); + + SDValue SplitVecOp_BITCAST(SDNode *N); + SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo); + SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *N, unsigned OpNo); + SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N); + SDValue SplitVecOp_VSETCC(SDNode *N); + SDValue SplitVecOp_FP_ROUND(SDNode *N); + SDValue SplitVecOp_FCOPYSIGN(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// GetWidenedVector - Given a processed vector Op which was widened into a + /// larger vector, this method returns the larger vector. The elements of + /// the returned vector consist of the elements of Op followed by elements + /// containing rubbish. For example, if Op is a v2i32 that was widened to a + /// v4i32, then this method returns a v4i32 for which the first two elements + /// are the same as those of Op, while the last two elements contain rubbish. + SDValue GetWidenedVector(SDValue Op) { + SDValue &WidenedOp = WidenedVectors[Op]; + RemapValue(WidenedOp); + assert(WidenedOp.getNode() && "Operand wasn't widened?"); + return WidenedOp; + } + void SetWidenedVector(SDValue Op, SDValue Result); + + // Widen Vector Result Promotion. + void WidenVectorResult(SDNode *N, unsigned ResNo); + SDValue WidenVecRes_MERGE_VALUES(SDNode* N, unsigned ResNo); + SDValue WidenVecRes_BITCAST(SDNode* N); + SDValue WidenVecRes_BUILD_VECTOR(SDNode* N); + SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N); + SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N); + SDValue WidenVecRes_EXTRACT_SUBVECTOR(SDNode* N); + SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); + SDValue WidenVecRes_LOAD(SDNode* N); + SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); + SDValue WidenVecRes_MGATHER(MaskedGatherSDNode* N); + SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N); + SDValue WidenVecRes_SELECT(SDNode* N); + SDValue WidenVecRes_SELECT_CC(SDNode* N); + SDValue WidenVecRes_SETCC(SDNode* N); + SDValue WidenVecRes_UNDEF(SDNode *N); + SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); + SDValue WidenVecRes_VSETCC(SDNode* N); + + SDValue WidenVecRes_Ternary(SDNode *N); + SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); + SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_FCOPYSIGN(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); + SDValue WidenVecRes_Shift(SDNode *N); + SDValue WidenVecRes_Unary(SDNode *N); + SDValue WidenVecRes_InregOp(SDNode *N); + + // Widen Vector Operand. + bool WidenVectorOperand(SDNode *N, unsigned OpNo); + SDValue WidenVecOp_BITCAST(SDNode *N); + SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); + SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); + SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); + SDValue WidenVecOp_STORE(SDNode* N); + SDValue WidenVecOp_MSTORE(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo); + SDValue WidenVecOp_SETCC(SDNode* N); + + SDValue WidenVecOp_Convert(SDNode *N); + SDValue WidenVecOp_FCOPYSIGN(SDNode *N); + + //===--------------------------------------------------------------------===// + // Vector Widening Utilities Support: LegalizeVectorTypes.cpp + //===--------------------------------------------------------------------===// + + /// Helper GenWidenVectorLoads - Helper function to generate a set of + /// loads to load a vector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + SDValue GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD); + + /// GenWidenVectorExtLoads - Helper function to generate a set of extension + /// loads to load a ector with a resulting wider type. It takes + /// LdChain: list of chains for the load to be generated. + /// Ld: load to widen + /// ExtType: extension element type + SDValue GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType); + + /// Helper genWidenVectorStores - Helper function to generate a set of + /// stores to store a widen vector into non-widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, StoreSDNode *ST); + + /// Helper genWidenVectorTruncStores - Helper function to generate a set of + /// stores to store a truncate widen vector into non-widen memory + /// StChain: list of chains for the stores we have generated + /// ST: store of a widen value + void GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, + StoreSDNode *ST); + + /// Modifies a vector input (widen or narrows) to a vector of NVT. The + /// input vector must have the same element type as NVT. + /// When FillWithZeroes is "on" the vector will be widened with + /// zeroes. + /// By default, the vector will be widened with undefined values. + SDValue ModifyToType(SDValue InOp, EVT NVT, bool FillWithZeroes = false); + + //===--------------------------------------------------------------------===// + // Generic Splitting: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // not necessarily identical types. As such they can be used for splitting + // vectors and expanding integers and floats. + + void GetSplitOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isVector()) + GetSplitVector(Op, Lo, Hi); + else if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and + /// high parts of the given value. + void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); + + // Generic Result Splitting. + void SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi); + + //===--------------------------------------------------------------------===// + // Generic Expansion: LegalizeTypesGeneric.cpp + //===--------------------------------------------------------------------===// + + // Legalization methods which only use that the illegal type is split into two + // identical types of half the size, and that the Lo/Hi part is stored first + // in memory on little/big-endian machines, followed by the Hi/Lo part. As + // such they can be used for expanding integers and floats. + + void GetExpandedOp(SDValue Op, SDValue &Lo, SDValue &Hi) { + if (Op.getValueType().isInteger()) + GetExpandedInteger(Op, Lo, Hi); + else + GetExpandedFloat(Op, Lo, Hi); + } + + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, EVT EltVT); + + // Generic Result Expansion. + void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi); + void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_NormalLoad (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi); + + // Generic Operand Expansion. + SDValue ExpandOp_BITCAST (SDNode *N); + SDValue ExpandOp_BUILD_VECTOR (SDNode *N); + SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N); + SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N); + SDValue ExpandOp_SCALAR_TO_VECTOR (SDNode *N); + SDValue ExpandOp_NormalStore (SDNode *N, unsigned OpNo); +}; + +} // end namespace llvm. + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp new file mode 100644 index 0000000..593c346 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -0,0 +1,563 @@ +//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements generic type expansion and splitting for LegalizeTypes. +// The routines here perform legalization when the details of the type (such as +// whether it is an integer or a float) do not matter. +// Expansion is the act of changing a computation in an illegal type to be a +// computation in two identical registers of a smaller type. The Lo/Hi part +// is required to be stored first in memory on little/big-endian machines. +// Splitting is the act of changing a computation in an illegal type to be a +// computation in two not necessarily identical registers of a smaller type. +// There are no requirements on how the type is represented in memory. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Generic Result Expansion. +//===----------------------------------------------------------------------===// + +// These routines assume that the Lo/Hi part is stored first in memory on +// little/big-endian machines, followed by the Hi/Lo part. This means that +// they cannot be used as is on vectors, for which Lo is always stored first. +void DAGTypeLegalizer::ExpandRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetExpandedOp(Op, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OutVT = N->getValueType(0); + EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + SDLoc dl(N); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + break; + case TargetLowering::TypePromoteFloat: + llvm_unreachable("Bitcast of a promotion-needing float should never need" + "expansion"); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + } + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: { + auto &DL = DAG.getDataLayout(); + // Convert the expanded pieces of the input. + GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT, DL) != + TLI.hasBigEndianPartOrdering(OutVT, DL)) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + } + case TargetLowering::TypeSplitVector: + GetSplitVector(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeScalarizeVector: + // Convert the element instead. + SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + case TargetLowering::TypeWidenVector: { + assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); + InOp = GetWidenedVector(InOp); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); + return; + } + } + + if (InVT.isVector() && OutVT.isInteger()) { + // Handle cases like i64 = BITCAST v1i64 on x86, where the operand + // is legal but the result is not. + unsigned NumElems = 2; + EVT ElemVT = NOutVT; + EVT NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + + // If <ElemVT * N> is not a legal type, try <ElemVT/2 * (N*2)>. + while (!isTypeLegal(NVT)) { + unsigned NewSizeInBits = ElemVT.getSizeInBits() / 2; + // If the element size is smaller than byte, bail. + if (NewSizeInBits < 8) + break; + NumElems *= 2; + ElemVT = EVT::getIntegerVT(*DAG.getContext(), NewSizeInBits); + NVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, NumElems); + } + + if (isTypeLegal(NVT)) { + SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp); + + SmallVector<SDValue, 8> Vals; + for (unsigned i = 0; i < NumElems; ++i) + Vals.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, CastInOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + + // Build Lo, Hi pair by pairing extracted elements if needed. + unsigned Slot = 0; + for (unsigned e = Vals.size(); e - Slot > 2; Slot += 2, e += 1) { + // Each iteration will BUILD_PAIR two nodes and append the result until + // there are only two nodes left, i.e. Lo and Hi. + SDValue LHS = Vals[Slot]; + SDValue RHS = Vals[Slot + 1]; + + if (DAG.getDataLayout().isBigEndian()) + std::swap(LHS, RHS); + + Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, + EVT::getIntegerVT( + *DAG.getContext(), + LHS.getValueType().getSizeInBits() << 1), + LHS, RHS)); + } + Lo = Vals[Slot++]; + Hi = Vals[Slot++]; + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + return; + } + } + + // Lower the bit-convert to a store/load from the stack. + assert(NOutVT.isByteSized() && "Expanded type not byte sized!"); + + // Create the stack frame object. Make sure it is aligned for both + // the source and expanded destination types. + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment( + NOutVT.getTypeForEVT(*DAG.getContext())); + SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment); + int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); + MachinePointerInfo PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); + + // Emit a store to the stack slot. + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo, + false, false, 0); + + // Load the first half from the stack slot. + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + false, false, false, 0); + + // Increment the pointer to the other half. + unsigned IncrementSize = NOutVT.getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, dl, + StackPtr.getValueType())); + + // Load the second half from the stack slot. + Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, + PtrInfo.getWithOffset(IncrementSize), false, + false, false, MinAlign(Alignment, IncrementSize)); + + // Handle endianness of the load. + if (TLI.hasBigEndianPartOrdering(OutVT, DAG.getDataLayout())) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_BUILD_PAIR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Return the operands. + Lo = N->getOperand(0); + Hi = N->getOperand(1); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_ELEMENT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + GetExpandedOp(N->getOperand(0), Lo, Hi); + SDValue Part = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? + Hi : Lo; + + assert(Part.getValueType() == N->getValueType(0) && + "Type twice as big as expanded type not itself expanded!"); + + GetPairElements(Part, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue OldVec = N->getOperand(0); + unsigned OldElts = OldVec.getValueType().getVectorNumElements(); + EVT OldEltVT = OldVec.getValueType().getVectorElementType(); + SDLoc dl(N); + + // Convert to a vector of the expanded element type, for example + // <3 x i64> -> <6 x i32>. + EVT OldVT = N->getValueType(0); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + + if (OldVT != OldEltVT) { + // The result of EXTRACT_VECTOR_ELT may be larger than the element type of + // the input vector. If so, extend the elements of the input vector to the + // same bitwidth as the result before expanding. + assert(OldEltVT.bitsLT(OldVT) && "Result type smaller then element type!"); + EVT NVecVT = EVT::getVectorVT(*DAG.getContext(), OldVT, OldElts); + OldVec = DAG.getNode(ISD::ANY_EXTEND, dl, NVecVT, N->getOperand(0)); + } + + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, 2*OldElts), + OldVec); + + // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. + SDValue Idx = N->getOperand(1); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, + DAG.getConstant(1, dl, Idx.getValueType())); + Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); +} + +void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); + SDLoc dl(N); + + LoadSDNode *LD = cast<LoadSDNode>(N); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); + SDValue Chain = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + unsigned Alignment = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + + Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); + + // Increment the pointer to the other half. + unsigned IncrementSize = NVT.getSizeInBits() / 8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getLoad(NVT, dl, Chain, Ptr, + LD->getPointerInfo().getWithOffset(IncrementSize), + isVolatile, isNonTemporal, isInvariant, + MinAlign(Alignment, IncrementSize), AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Handle endianness of the load. + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Chain); +} + +void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); + SDValue Chain = N->getOperand(0); + SDValue Ptr = N->getOperand(1); + SDLoc dl(N); + const unsigned Align = N->getConstantOperandVal(3); + + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); + + // Handle endianness of the load. + if (TLI.hasBigEndianPartOrdering(OVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), Hi.getValue(1)); +} + + +//===--------------------------------------------------------------------===// +// Generic Operand Expansion. +//===--------------------------------------------------------------------===// + +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl<SDValue> &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + +SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { + SDLoc dl(N); + if (N->getValueType(0).isVector()) { + // An illegal expanding type is being converted to a legal vector type. + // Make a two element vector out of the expanded parts and convert that + // instead, but only if the new vector type is legal (otherwise there + // is no point, and it might create expansion loops). For example, on + // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; + EVT OVT = N->getOperand(0).getValueType(); + EVT NVT = EVT::getVectorVT(*DAG.getContext(), + TLI.getTypeToTransformTo(*DAG.getContext(), OVT), + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } + + SmallVector<SDValue, 8> Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); + + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, + makeArrayRef(Ops.data(), NumElts)); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); + } + + // Otherwise, store to a temporary and load out again as the new type. + return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0)); +} + +SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + EVT OldVT = N->getOperand(0).getValueType(); + EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); + SDLoc dl(N); + + assert(OldVT == VecVT.getVectorElementType() && + "BUILD_VECTOR operand type doesn't match vector element type!"); + + // Build a vector of twice the length out of the expanded elements. + // For example <3 x i64> -> <6 x i32>. + std::vector<SDValue> NewElts; + NewElts.reserve(NumElts*2); + + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(i), Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + NewElts.push_back(Lo); + NewElts.push_back(Hi); + } + + SDValue NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*DAG.getContext(), + NewVT, NewElts.size()), + NewElts); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) { + SDValue Lo, Hi; + GetExpandedOp(N->getOperand(0), Lo, Hi); + return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() ? Hi : Lo; +} + +SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { + // The vector type is legal but the element type needs expansion. + EVT VecVT = N->getValueType(0); + unsigned NumElts = VecVT.getVectorNumElements(); + SDLoc dl(N); + + SDValue Val = N->getOperand(1); + EVT OldEVT = Val.getValueType(); + EVT NewEVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldEVT); + + assert(OldEVT == VecVT.getVectorElementType() && + "Inserted element type doesn't match vector element type!"); + + // Bitconvert to a vector of twice the length with elements of the expanded + // type, insert the expanded vector elements, and then convert back. + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2); + SDValue NewVec = DAG.getNode(ISD::BITCAST, dl, + NewVecVT, N->getOperand(0)); + + SDValue Lo, Hi; + GetExpandedOp(Val, Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + SDValue Idx = N->getOperand(2); + Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); + Idx = DAG.getNode(ISD::ADD, dl, + Idx.getValueType(), Idx, + DAG.getConstant(1, dl, Idx.getValueType())); + NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); + + // Convert the new vector to the old vector type. + return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec); +} + +SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { + SDLoc dl(N); + EVT VT = N->getValueType(0); + assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && + "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + Ops[0] = N->getOperand(0); + SDValue UndefVal = DAG.getUNDEF(Ops[0].getValueType()); + for (unsigned i = 1; i < NumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { + assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); + assert(OpNo == 1 && "Can only expand the stored value so far"); + SDLoc dl(N); + + StoreSDNode *St = cast<StoreSDNode>(N); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); + SDValue Chain = St->getChain(); + SDValue Ptr = St->getBasePtr(); + unsigned Alignment = St->getAlignment(); + bool isVolatile = St->isVolatile(); + bool isNonTemporal = St->isNonTemporal(); + AAMDNodes AAInfo = St->getAAInfo(); + + assert(NVT.isByteSized() && "Expanded type not byte sized!"); + unsigned IncrementSize = NVT.getSizeInBits() / 8; + + SDValue Lo, Hi; + GetExpandedOp(St->getValue(), Lo, Hi); + + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + + Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), + isVolatile, isNonTemporal, Alignment, AAInfo); + + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getStore(Chain, dl, Hi, Ptr, + St->getPointerInfo().getWithOffset(IncrementSize), + isVolatile, isNonTemporal, + MinAlign(Alignment, IncrementSize), AAInfo); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); +} + + +//===--------------------------------------------------------------------===// +// Generic Result Splitting. +//===--------------------------------------------------------------------===// + +// Be careful to make no assumptions about which of Lo/Hi is stored first in +// memory (for vectors it is always Lo first followed by Hi in the following +// bytes; for integers and floats it is Lo first if and only if the machine is +// little-endian). + +void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + GetSplitOp(Op, Lo, Hi); +} + +void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH, CL, CH; + SDLoc dl(N); + GetSplitOp(N->getOperand(1), LL, LH); + GetSplitOp(N->getOperand(2), RL, RH); + + SDValue Cond = N->getOperand(0); + CL = CH = Cond; + if (Cond.getValueType().isVector()) { + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + else + std::tie(CL, CH) = DAG.SplitVector(Cond, dl); + } + + Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); + Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH); +} + +void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LL, LH, RL, RH; + SDLoc dl(N); + GetSplitOp(N->getOperand(2), LL, LH); + GetSplitOp(N->getOperand(3), RL, RH); + + Lo = DAG.getNode(ISD::SELECT_CC, dl, LL.getValueType(), N->getOperand(0), + N->getOperand(1), LL, RL, N->getOperand(4)); + Hi = DAG.getNode(ISD::SELECT_CC, dl, LH.getValueType(), N->getOperand(0), + N->getOperand(1), LH, RH, N->getOperand(4)); +} + +void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getUNDEF(LoVT); + Hi = DAG.getUNDEF(HiVT); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp new file mode 100644 index 0000000..f61f631 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -0,0 +1,1070 @@ +//===-- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SelectionDAG::LegalizeVectors method. +// +// The vector legalizer looks for vector operations which might need to be +// scalarized and legalizes them. This is a separate step from Legalize because +// scalarizing can introduce illegal types. For example, suppose we have an +// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition +// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the +// operation, which introduces nodes with the illegal type i64 which must be +// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; +// the operation must be unrolled, which introduces nodes with the illegal +// type i8 which must be promoted. +// +// This does not legalize vector manipulations like ISD::BUILD_VECTOR, +// or operations that happen to take a vector which are custom-lowered; +// the legalization for such operations never produces nodes +// with illegal types, so it's okay to put off legalizing them until +// SelectionDAG::Legalize runs. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/Target/TargetLowering.h" +using namespace llvm; + +namespace { +class VectorLegalizer { + SelectionDAG& DAG; + const TargetLowering &TLI; + bool Changed; // Keep track of whether anything changed + + /// For nodes that are of legal width, and that have more than one use, this + /// map indicates what regularized operand to use. This allows us to avoid + /// legalizing the same thing more than once. + SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; + + /// \brief Adds a node to the translation cache. + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + } + + /// \brief Legalizes the given node. + SDValue LegalizeOp(SDValue Op); + + /// \brief Assuming the node is legal, "legalize" the results. + SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); + + /// \brief Implements unrolling a VSETCC. + SDValue UnrollVSETCC(SDValue Op); + + /// \brief Implement expand-based legalization of vector operations. + /// + /// This is just a high-level routine to dispatch to specific code paths for + /// operations to legalize them. + SDValue Expand(SDValue Op); + + /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// FSUB isn't legal. + /// + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + /// SINT_TO_FLOAT and SHR on vectors isn't legal. + SDValue ExpandUINT_TO_FLOAT(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); + + /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Expand bswap of vectors into a shuffle if legal. + SDValue ExpandBSWAP(SDValue Op); + + /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// supported by the target. + SDValue ExpandVSELECT(SDValue Op); + SDValue ExpandSELECT(SDValue Op); + SDValue ExpandLoad(SDValue Op); + SDValue ExpandStore(SDValue Op); + SDValue ExpandFNEG(SDValue Op); + SDValue ExpandBITREVERSE(SDValue Op); + SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op); + + /// \brief Implements vector promotion. + /// + /// This is essentially just bitcasting the operands to a different type and + /// bitcasting the result back to the original type. + SDValue Promote(SDValue Op); + + /// \brief Implements [SU]INT_TO_FP vector promotion. + /// + /// This is a [zs]ext of the input operand to the next size up. + SDValue PromoteINT_TO_FP(SDValue Op); + + /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// + /// It is promoted to the next size up integer type. The result is then + /// truncated back to the original type. + SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + +public: + /// \brief Begin legalizer the vector operations in the DAG. + bool Run(); + VectorLegalizer(SelectionDAG& dag) : + DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} +}; + +bool VectorLegalizer::Run() { + // Before we start legalizing vector nodes, check if there are any vectors. + bool HasVectors = false; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { + // Check if the values of the nodes contain vectors. We don't need to check + // the operands because we are going to check their values at some point. + for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); + J != E; ++J) + HasVectors |= J->isVector(); + + // If we found a vector node we can start the legalization. + if (HasVectors) + break; + } + + // If this basic block has no vectors then no need to legalize vectors. + if (!HasVectors) + return false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. + DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) + LegalizeOp(SDValue(&*I, 0)); + + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); + + return Changed; +} + +SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDValue Result) { + // Generic legalization: just pass the operand through. + for (unsigned i = 0, e = Op.getNode()->getNumValues(); i != e; ++i) + AddLegalizedOperand(Op.getValue(i), Result.getValue(i)); + return Result.getValue(Op.getResNo()); +} + +SDValue VectorLegalizer::LegalizeOp(SDValue Op) { + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + + SDNode* Node = Op.getNode(); + + // Legalize the operands + SmallVector<SDValue, 8> Ops; + for (const SDValue &Op : Node->op_values()) + Ops.push_back(LegalizeOp(Op)); + + SDValue Result = SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops), 0); + + bool HasVectorValue = false; + if (Op.getOpcode() == ISD::LOAD) { + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + ISD::LoadExtType ExtType = LD->getExtensionType(); + if (LD->getMemoryVT().isVector() && ExtType != ISD::NON_EXTLOAD) + switch (TLI.getLoadExtAction(LD->getExtensionType(), LD->getValueType(0), + LD->getMemoryVT())) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: + if (SDValue Lowered = TLI.LowerOperation(Result, DAG)) { + if (Lowered == Result) + return TranslateLegalizeResults(Op, Lowered); + Changed = true; + if (Lowered->getNumValues() != Op->getNumValues()) { + // This expanded to something other than the load. Assume the + // lowering code took care of any chain values, and just handle the + // returned value. + assert(Result.getValue(1).use_empty() && + "There are still live users of the old chain!"); + return LegalizeOp(Lowered); + } + return TranslateLegalizeResults(Op, Lowered); + } + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandLoad(Op)); + } + } else if (Op.getOpcode() == ISD::STORE) { + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + EVT StVT = ST->getMemoryVT(); + MVT ValVT = ST->getValue().getSimpleValueType(); + if (StVT.isVector() && ST->isTruncatingStore()) + switch (TLI.getTruncStoreAction(ValVT, StVT)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Legal: + return TranslateLegalizeResults(Op, Result); + case TargetLowering::Custom: { + SDValue Lowered = TLI.LowerOperation(Result, DAG); + Changed = Lowered != Result; + return TranslateLegalizeResults(Op, Lowered); + } + case TargetLowering::Expand: + Changed = true; + return LegalizeOp(ExpandStore(Op)); + } + } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) + HasVectorValue = true; + + for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); + J != E; + ++J) + HasVectorValue |= J->isVector(); + if (!HasVectorValue) + return TranslateLegalizeResults(Op, Result); + + EVT QueryType; + switch (Op.getOpcode()) { + default: + return TranslateLegalizeResults(Op, Result); + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::SDIVREM: + case ISD::UDIVREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::BSWAP: + case ISD::BITREVERSE: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::SELECT: + case ISD::VSELECT: + case ISD::SELECT_CC: + case ISD::SETCC: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::TRUNCATE: + case ISD::SIGN_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FNEG: + case ISD::FABS: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::FCOPYSIGN: + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FFLOOR: + case ISD::FP_ROUND: + case ISD::FP_EXTEND: + case ISD::FMA: + case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + QueryType = Node->getValueType(0); + break; + case ISD::FP_ROUND_INREG: + QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT(); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + QueryType = Node->getOperand(0).getValueType(); + break; + case ISD::MSCATTER: + QueryType = cast<MaskedScatterSDNode>(Node)->getValue().getValueType(); + break; + case ISD::MSTORE: + QueryType = cast<MaskedStoreSDNode>(Node)->getValue().getValueType(); + break; + } + + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { + default: llvm_unreachable("This action is not supported yet!"); + case TargetLowering::Promote: + Result = Promote(Op); + Changed = true; + break; + case TargetLowering::Legal: + break; + case TargetLowering::Custom: { + SDValue Tmp1 = TLI.LowerOperation(Op, DAG); + if (Tmp1.getNode()) { + Result = Tmp1; + break; + } + // FALL THROUGH + } + case TargetLowering::Expand: + Result = Expand(Op); + } + + // Make sure that the generated code is itself legal. + if (Result != Op) { + Result = LegalizeOp(Result); + Changed = true; + } + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; +} + +SDValue VectorLegalizer::Promote(SDValue Op) { + // For a few operations there is a specific concept for promotion based on + // the operand's type. + switch (Op.getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + return PromoteINT_TO_FP(Op); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + } + + // There are currently two cases of vector promotion: + // 1) Bitcasting a vector of integers to a different type to a vector of the + // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. + // 2) Extending a vector of floats to a vector of the same number of larger + // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. + MVT VT = Op.getSimpleValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + SDLoc dl(Op); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + if (Op.getOperand(j) + .getValueType() + .getVectorElementType() + .isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) + Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Op.getOperand(j)); + else + Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + Op = DAG.getNode(Op.getOpcode(), dl, NVT, Operands, Op.getNode()->getFlags()); + if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || + (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && + NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) + return DAG.getNode(ISD::FP_ROUND, dl, VT, Op, DAG.getIntPtrConstant(0, dl)); + else + return DAG.getNode(ISD::BITCAST, dl, VT, Op); +} + +SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { + // INT_TO_FP operations may require the input operand be promoted even + // when the type is otherwise legal. + EVT VT = Op.getOperand(0).getValueType(); + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + + // Normal getTypeToPromoteTo() doesn't work here, as that will promote + // by widening the vector w/ the same element width and twice the number + // of elements. We want the other way around, the same number of elements, + // each twice the width. + // + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + + EVT NVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NVT.isSimple() && "Promoting to a non-simple vector type!"); + SDLoc dl(Op); + SmallVector<SDValue, 4> Operands(Op.getNumOperands()); + + unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : + ISD::SIGN_EXTEND; + for (unsigned j = 0; j != Op.getNumOperands(); ++j) { + if (Op.getOperand(j).getValueType().isVector()) + Operands[j] = DAG.getNode(Opc, dl, NVT, Op.getOperand(j)); + else + Operands[j] = Op.getOperand(j); + } + + return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Operands); +} + +// For FP_TO_INT we promote the result type to a vector type with wider +// elements and then truncate the result. This is different from the default +// PromoteVector which uses bitcast to promote thus assumning that the +// promoted vector type has the same overall size. +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { + assert(Op.getNode()->getNumValues() == 1 && + "Can't promote a vector with multiple results!"); + EVT VT = Op.getValueType(); + + EVT NewVT; + unsigned NewOpc; + while (1) { + NewVT = VT.widenIntegerVectorElementType(*DAG.getContext()); + assert(NewVT.isSimple() && "Promoting to a non-simple vector type!"); + if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewVT)) { + NewOpc = ISD::FP_TO_SINT; + break; + } + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewVT)) { + NewOpc = ISD::FP_TO_UINT; + break; + } + } + + SDLoc loc(Op); + SDValue promoted = DAG.getNode(NewOpc, SDLoc(Op), NewVT, Op.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(Op), VT, promoted); +} + + +SDValue VectorLegalizer::ExpandLoad(SDValue Op) { + SDLoc dl(Op); + LoadSDNode *LD = cast<LoadSDNode>(Op.getNode()); + SDValue Chain = LD->getChain(); + SDValue BasePTR = LD->getBasePtr(); + EVT SrcVT = LD->getMemoryVT(); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SmallVector<SDValue, 8> Vals; + SmallVector<SDValue, 8> LoadChains; + unsigned NumElem = SrcVT.getVectorNumElements(); + + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + // When elements in a vector is not byte-addressable, we cannot directly + // load each element by advancing pointer, which could only address bytes. + // Instead, we load all significant words, mask bits off, and concatenate + // them to form each element. Finally, they are extended to destination + // scalar type to build the destination vector. + EVT WideVT = TLI.getPointerTy(DAG.getDataLayout()); + + assert(WideVT.isRound() && + "Could not handle the sophisticated case when the widest integer is" + " not power of 2."); + assert(WideVT.bitsGE(SrcEltVT) && + "Type is not legalized?"); + + unsigned WideBytes = WideVT.getStoreSize(); + unsigned Offset = 0; + unsigned RemainingBytes = SrcVT.getStoreSize(); + SmallVector<SDValue, 8> LoadVals; + + while (RemainingBytes > 0) { + SDValue ScalarLoad; + unsigned LoadBytes = WideBytes; + + if (RemainingBytes >= LoadBytes) { + ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); + } else { + EVT LoadVT = WideVT; + while (RemainingBytes < LoadBytes) { + LoadBytes >>= 1; // Reduce the load size by half. + LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); + } + ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LoadVT, LD->isVolatile(), + LD->isNonTemporal(), LD->isInvariant(), + MinAlign(LD->getAlignment(), Offset), + LD->getAAInfo()); + } + + RemainingBytes -= LoadBytes; + Offset += LoadBytes; + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getConstant(LoadBytes, dl, + BasePTR.getValueType())); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT); + + unsigned BitOffset = 0; + unsigned WideIdx = 0; + unsigned WideBits = WideVT.getSizeInBits(); + + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { + SDValue Lo, Hi, ShAmt; + + if (BitOffset < WideBits) { + ShAmt = DAG.getConstant( + BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); + } + + BitOffset += SrcEltBits; + if (BitOffset >= WideBits) { + WideIdx++; + BitOffset -= WideBits; + if (BitOffset > 0) { + ShAmt = DAG.getConstant( + SrcEltBits - BitOffset, dl, + TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); + } + } + + if (Hi.getNode()) + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + + switch (ExtType) { + default: llvm_unreachable("Unknown extended-load op!"); + case ISD::EXTLOAD: + Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::ZEXTLOAD: + Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::SEXTLOAD: + ShAmt = + DAG.getConstant(WideBits - SrcEltBits, dl, + TLI.getShiftAmountTy(WideVT, DAG.getDataLayout())); + Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); + Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); + Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); + break; + } + Vals.push_back(Lo); + } + } else { + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; Idx<NumElem; Idx++) { + SDValue ScalarLoad = DAG.getExtLoad(ExtType, dl, + Op.getNode()->getValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), + MinAlign(LD->getAlignment(), Idx * Stride), LD->getAAInfo()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getConstant(Stride, dl, BasePTR.getValueType())); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + } + + SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, + Op.getNode()->getValueType(0), Vals); + + AddLegalizedOperand(Op.getValue(0), Value); + AddLegalizedOperand(Op.getValue(1), NewChain); + + return (Op.getResNo() ? NewChain : Value); +} + +SDValue VectorLegalizer::ExpandStore(SDValue Op) { + SDLoc dl(Op); + StoreSDNode *ST = cast<StoreSDNode>(Op.getNode()); + SDValue Chain = ST->getChain(); + SDValue BasePTR = ST->getBasePtr(); + SDValue Value = ST->getValue(); + EVT StVT = ST->getMemoryVT(); + + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + + unsigned NumElem = StVT.getVectorNumElements(); + // The type of the data we want to save + EVT RegVT = Value.getValueType(); + EVT RegSclVT = RegVT.getScalarType(); + // The type of data as saved in memory. + EVT MemSclVT = StVT.getScalarType(); + + // Cast floats into integers + unsigned ScalarSize = MemSclVT.getSizeInBits(); + + // Round odd types to the next pow of two. + if (!isPowerOf2_32(ScalarSize)) + ScalarSize = NextPowerOf2(ScalarSize); + + // Store Stride in bytes + unsigned Stride = ScalarSize/8; + // Extract each of the elements from the original vector + // and save them into memory individually. + SmallVector<SDValue, 8> Stores; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, RegSclVT, Value, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + // This scalar TruncStore may be illegal, but we legalize it later. + SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, + ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, + isVolatile, isNonTemporal, MinAlign(Alignment, Idx*Stride), + AAInfo); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getConstant(Stride, dl, BasePTR.getValueType())); + + Stores.push_back(Store); + } + SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores); + AddLegalizedOperand(Op, TF); + return TF; +} + +SDValue VectorLegalizer::Expand(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::BSWAP: + return ExpandBSWAP(Op); + case ISD::VSELECT: + return ExpandVSELECT(Op); + case ISD::SELECT: + return ExpandSELECT(Op); + case ISD::UINT_TO_FP: + return ExpandUINT_TO_FLOAT(Op); + case ISD::FNEG: + return ExpandFNEG(Op); + case ISD::SETCC: + return UnrollVSETCC(Op); + case ISD::BITREVERSE: + return ExpandBITREVERSE(Op); + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op); + default: + return DAG.UnrollVectorOp(Op.getNode()); + } +} + +SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { + // Lower a select instruction where the condition is a scalar and the + // operands are vectors. Lower this select to VSELECT and implement it + // using XOR AND OR. The selector bit is broadcasted. + EVT VT = Op.getValueType(); + SDLoc DL(Op); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + assert(VT.isVector() && !Mask.getValueType().isVector() + && Op1.getValueType() == Op2.getValueType() && "Invalid type"); + + unsigned NumElem = VT.getVectorNumElements(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // Also, we need to be able to construct a splat vector using BUILD_VECTOR. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::BUILD_VECTOR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + // Generate a mask operand. + EVT MaskTy = VT.changeVectorElementTypeToInteger(); + + // What is the size of each element in the vector mask. + EVT BitTy = MaskTy.getScalarType(); + + Mask = DAG.getSelect(DL, BitTy, Mask, + DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, + BitTy), + DAG.getConstant(0, DL, BitTy)); + + // Broadcast the mask so that the entire vector is all-one or all zero. + SmallVector<SDValue, 8> Ops(NumElem, Mask); + Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskTy, Ops); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(BitTy.getSizeInBits()), DL, MaskTy); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, MaskTy, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SHL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarType().getSizeInBits(); + unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build a base mask of undef shuffles. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); + unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, DL, SrcScalarVT); + SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { + EVT VT = Op.getValueType(); + + // Generate a byte wise shuffle mask for the BSWAP. + SmallVector<int, 16> ShuffleMask; + int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; + for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) + for (int J = ScalarSizeInBytes - 1; J >= 0; --J) + ShuffleMask.push_back((I * ScalarSizeInBytes) + J); + + EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); + + // Only emit a shuffle if the mask is legal. + if (!TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) + return DAG.UnrollVectorOp(Op.getNode()); + + SDLoc DL(Op); + Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Op.getOperand(0)); + Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), + ShuffleMask.data()); + return DAG.getNode(ISD::BITCAST, DL, VT, Op); +} + +SDValue VectorLegalizer::ExpandBITREVERSE(SDValue Op) { + EVT VT = Op.getValueType(); + + // If we have the scalar operation, it's probably cheaper to unroll it. + if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) + return DAG.UnrollVectorOp(Op.getNode()); + + // If we have the appropriate vector bit operations, it is better to use them + // than unrolling and expanding each component. + if (!TLI.isOperationLegalOrCustom(ISD::SHL, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT) || + !TLI.isOperationLegalOrCustom(ISD::AND, VT) || + !TLI.isOperationLegalOrCustom(ISD::OR, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + // Let LegalizeDAG handle this later. + return Op; +} + +SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { + // Implement VSELECT in terms of XOR, AND, OR + // on platforms which do not support blend natively. + SDLoc DL(Op); + + SDValue Mask = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + + EVT VT = Mask.getValueType(); + + // If we can't even use the basic vector operations of + // AND,OR,XOR, we will have to scalarize the op. + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + // This operation also isn't safe with AND, OR, XOR when the boolean + // type is 0/1 as we need an all ones vector constant to mask with. + // FIXME: Sign extend 1 to all ones if thats legal on the target. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) + return DAG.UnrollVectorOp(Op.getNode()); + + // If the mask and the type are different sizes, unroll the vector op. This + // can occur when getSetCCResultType returns something that is different in + // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. + if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + return DAG.UnrollVectorOp(Op.getNode()); + + // Bitcast the operands to be the same type as the mask. + // This is needed when we select between FP types because + // the mask is a vector of integers. + Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); + Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); + + SDValue AllOnes = DAG.getConstant( + APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT); + SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes); + + Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); + Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); + SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); + return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); +} + +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { + EVT VT = Op.getOperand(0).getValueType(); + SDLoc DL(Op); + + // Make sure that the SINT_TO_FP and SRL instructions are available. + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + EVT SVT = VT.getScalarType(); + assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + + unsigned BW = SVT.getSizeInBits(); + SDValue HalfWord = DAG.getConstant(BW/2, DL, VT); + + // Constants to clear the upper part of the word. + // Notice that we can also use SHL+SHR, but using a constant is slightly + // faster on x86. + uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT); + + // Two to the power of half-word-size. + SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType()); + + // Clear upper part of LO, lower HI + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + + // Convert hi and lo to floats + // Convert the hi part back to the upper values + // TODO: Can any fast-math-flags be set on these nodes? + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + + // Add the two halves + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); +} + + +SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { + if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { + SDLoc DL(Op); + SDValue Zero = DAG.getConstantFP(-0.0, DL, Op.getValueType()); + // TODO: If FNEG had fast-math-flags, they'd get propagated to this FSUB. + return DAG.getNode(ISD::FSUB, DL, Op.getValueType(), + Zero, Op.getOperand(0)); + } + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) { + // If the non-ZERO_UNDEF version is supported we can let LegalizeDAG handle. + unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ; + if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) + return Op; + + // Otherwise go ahead and unroll. + return DAG.UnrollVectorOp(Op.getNode()); +} + +SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { + EVT VT = Op.getValueType(); + unsigned NumElems = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); + EVT TmpEltVT = LHS.getValueType().getVectorElementType(); + SDLoc dl(Op); + SmallVector<SDValue, 8> Ops(NumElems); + for (unsigned i = 0; i < NumElems; ++i) { + SDValue LHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue RHSElem = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), TmpEltVT), + LHSElem, RHSElem, CC); + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), dl, EltVT), + DAG.getConstant(0, dl, EltVT)); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +} + +bool SelectionDAG::LegalizeVectors() { + return VectorLegalizer(*this).Run(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp new file mode 100644 index 0000000..d0187d3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -0,0 +1,3755 @@ +//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file performs vector type splitting and scalarization for LegalizeTypes. +// Scalarization is the act of changing a computation in an illegal one-element +// vector type to be a computation in its scalar element type. For example, +// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed +// as a base case when scalarizing vector arithmetic like <4 x f32>, which +// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 +// types. +// Splitting is the act of changing a computation in an invalid vector type to +// be a computation in two vectors of half the size. For example, implementing +// <128 x f32> operations in terms of two <64 x f32> operations. +// +//===----------------------------------------------------------------------===// + +#include "LegalizeTypes.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "legalize-types" + +//===----------------------------------------------------------------------===// +// Result Vector Scalarization: <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue R = SDValue(); + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to scalarize the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; + case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; + case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; + case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; + case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; + case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; + case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; + case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; + case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; + case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; + case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; + case ISD::ANY_EXTEND: + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + R = ScalarizeVecRes_UnaryOp(N); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::FADD: + case ISD::FCOPYSIGN: + case ISD::FDIV: + case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + + case ISD::FPOW: + case ISD::FREM: + case ISD::FSUB: + case ISD::MUL: + case ISD::OR: + case ISD::SDIV: + case ISD::SREM: + case ISD::SUB: + case ISD::UDIV: + case ISD::UREM: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + R = ScalarizeVecRes_BinOp(N); + break; + case ISD::FMA: + R = ScalarizeVecRes_TernaryOp(N); + break; + } + + // If R is null, the sub-method took care of registering the result. + if (R.getNode()) + SetScalarizedVector(SDValue(N, ResNo), R); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + LHS.getValueType(), LHS, RHS, N->getFlags()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + SDValue Op1 = GetScalarizedVector(N->getOperand(1)); + SDValue Op2 = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + Op0.getValueType(), Op0, Op1, Op2); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, + unsigned ResNo) { + SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); + return GetScalarizedVector(Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + return DAG.getNode(ISD::BITCAST, SDLoc(N), + NewVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + // The BUILD_VECTOR operands may be of wider element types and + // we may need to truncate them back to the requested return type. + if (EltVT.isInteger()) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op0 = GetScalarizedVector(N->getOperand(0)); + return DAG.getConvertRndSat(NewVT, SDLoc(N), + Op0, DAG.getValueType(NewVT), + DAG.getValueType(Op0.getValueType()), + N->getOperand(3), + N->getOperand(4), + cast<CvtRndSatSDNode>(N)->getCvtCode()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + N->getValueType(0).getVectorElementType(), + N->getOperand(0), N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), + NewVT, Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FPOWI, SDLoc(N), + Op.getValueType(), Op, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { + // The value to insert may have a wider type than the vector element type, + // so be sure to truncate it to the element type if necessary. + SDValue Op = N->getOperand(1); + EVT EltVT = N->getValueType(0).getVectorElementType(); + if (Op.getValueType() != EltVT) + // FIXME: Can this happen for floating point types? + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); + return Op; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { + assert(N->isUnindexed() && "Indexed vector load?"); + + SDValue Result = DAG.getLoad(ISD::UNINDEXED, + N->getExtensionType(), + N->getValueType(0).getVectorElementType(), + SDLoc(N), + N->getChain(), N->getBasePtr(), + DAG.getUNDEF(N->getBasePtr().getValueType()), + N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->isInvariant(), N->getOriginalAlignment(), + N->getAAInfo()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); + return Result; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { + // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. + EVT DestVT = N->getValueType(0).getVectorElementType(); + SDValue Op = N->getOperand(0); + EVT OpVT = Op.getValueType(); + SDLoc DL(N); + // The result needs scalarizing, but it's not a given that the source does. + // This is a workaround for targets where it's impossible to scalarize the + // result of a conversion, because the source type is legal. + // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} + // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is + // legal and was not scalarized. + // See the similar logic in ScalarizeVecRes_VSETCC + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + Op = GetScalarizedVector(Op); + } else { + EVT VT = OpVT.getVectorElementType(); + Op = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { + EVT EltVT = N->getValueType(0).getVectorElementType(); + EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, + LHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { + // If the operand is wider than the vector element type then it is implicitly + // truncated. Make that explicit here. + EVT EltVT = N->getValueType(0).getVectorElementType(); + SDValue InOp = N->getOperand(0); + if (InOp.getValueType() != EltVT) + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); + return InOp; +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { + SDValue Cond = GetScalarizedVector(N->getOperand(0)); + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + + if (ScalarBool != VecBool) { + EVT CondVT = Cond.getValueType(); + switch (ScalarBool) { + case TargetLowering::UndefinedBooleanContent: + break; + case TargetLowering::ZeroOrOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); + // Vector read from all ones, scalar expects a single 1 so mask. + Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, + Cond, DAG.getConstant(1, SDLoc(N), CondVT)); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + assert(VecBool == TargetLowering::UndefinedBooleanContent || + VecBool == TargetLowering::ZeroOrOneBooleanContent); + // Vector reads from a one, scalar from all ones so sign extend. + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, + Cond, DAG.getValueType(MVT::i1)); + break; + } + } + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(1)); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { + SDValue LHS = GetScalarizedVector(N->getOperand(2)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), + N->getOperand(0), N->getOperand(1), + LHS, GetScalarizedVector(N->getOperand(3)), + N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + + if (N->getValueType(0).isVector()) return ScalarizeVecRes_VSETCC(N); + + SDValue LHS = GetScalarizedVector(N->getOperand(0)); + SDValue RHS = GetScalarizedVector(N->getOperand(1)); + SDLoc DL(N); + + // Turn it into a scalar SETCC. + return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { + // Figure out if the scalar is the LHS or RHS and return it. + SDValue Arg = N->getOperand(2).getOperand(0); + if (Arg.getOpcode() == ISD::UNDEF) + return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); + unsigned Op = !cast<ConstantSDNode>(Arg)->isNullValue(); + return GetScalarizedVector(N->getOperand(Op)); +} + +SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + EVT OpVT = LHS.getValueType(); + EVT NVT = N->getValueType(0).getVectorElementType(); + SDLoc DL(N); + + // The result needs scalarizing, but it's not a given that the source does. + if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { + LHS = GetScalarizedVector(LHS); + RHS = GetScalarizedVector(RHS); + } else { + EVT VT = OpVT.getVectorElementType(); + LHS = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + RHS = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Turn it into a scalar SETCC. + SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, + N->getOperand(2)); + // Vectors may have a different boolean contents to scalars. Promote the + // value appropriately. + ISD::NodeType ExtendCode = + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); + return DAG.getNode(ExtendCode, DL, NVT, Res); +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Scalarization <1 x ty> -> ty. +//===----------------------------------------------------------------------===// + +bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + if (!Res.getNode()) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to scalarize this operator's operand!"); + case ISD::BITCAST: + Res = ScalarizeVecOp_BITCAST(N); + break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::TRUNCATE: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + Res = ScalarizeVecOp_UnaryOp(N); + break; + case ISD::CONCAT_VECTORS: + Res = ScalarizeVecOp_CONCAT_VECTORS(N); + break; + case ISD::EXTRACT_VECTOR_ELT: + Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); + break; + case ISD::VSELECT: + Res = ScalarizeVecOp_VSELECT(N); + break; + case ISD::STORE: + Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::FP_ROUND: + Res = ScalarizeVecOp_FP_ROUND(N, OpNo); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::BITCAST, SDLoc(N), + N->getValueType(0), Elt); +} + +/// ScalarizeVecOp_UnaryOp - If the input is a vector that needs to be +/// scalarized, it must be <1 x ty>. Do the operation on the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Op); +} + +/// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - +/// use a BUILD_VECTOR instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { + SmallVector<SDValue, 8> Ops(N->getNumOperands()); + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) + Ops[i] = GetScalarizedVector(N->getOperand(i)); + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), Ops); +} + +/// ScalarizeVecOp_EXTRACT_VECTOR_ELT - If the input is a vector that needs to +/// be scalarized, it must be <1 x ty>, so just return the element, ignoring the +/// index. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Res = GetScalarizedVector(N->getOperand(0)); + if (Res.getValueType() != N->getValueType(0)) + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), + Res); + return Res; +} + + +/// ScalarizeVecOp_VSELECT - If the input condition is a vector that needs to be +/// scalarized, it must be <1 x i1>, so just convert to a normal ISD::SELECT +/// (still with vector output type since that was acceptable if we got here). +SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { + SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); + EVT VT = N->getValueType(0); + + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), + N->getOperand(2)); +} + +/// ScalarizeVecOp_STORE - If the value to store is a vector that needs to be +/// scalarized, it must be <1 x ty>. Just store the element. +SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ + assert(N->isUnindexed() && "Indexed store of one-element vector?"); + assert(OpNo == 1 && "Do not know how to scalarize this operand!"); + SDLoc dl(N); + + if (N->isTruncatingStore()) + return DAG.getTruncStore(N->getChain(), dl, + GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->getMemoryVT().getVectorElementType(), + N->isVolatile(), N->isNonTemporal(), + N->getAlignment(), N->getAAInfo()); + + return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), + N->getBasePtr(), N->getPointerInfo(), + N->isVolatile(), N->isNonTemporal(), + N->getOriginalAlignment(), N->getAAInfo()); +} + +/// ScalarizeVecOp_FP_ROUND - If the value to round is a vector that needs +/// to be scalarized, it must be <1 x ty>. Convert the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), + N->getValueType(0).getVectorElementType(), Elt, + N->getOperand(1)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); +} + +//===----------------------------------------------------------------------===// +// Result Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorResult - This method is called when the specified result of the +/// specified node is found to need vector splitting. At this point, the node +/// may also have invalid operands or may have other results that need +/// legalization, we just know that (at least) one result needs vector +/// splitting. +void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Split node result: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Lo, Hi; + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(ResNo), true)) + return; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split the result of this " + "operator!\n"); + + case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; + case ISD::VSELECT: + case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; + case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; + case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; + case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; + case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; + case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; + case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; + case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; + case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break; + case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; + case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break; + case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; + case ISD::LOAD: + SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); + break; + case ISD::MLOAD: + SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); + break; + case ISD::MGATHER: + SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi); + break; + case ISD::SETCC: + SplitVecRes_SETCC(N, Lo, Hi); + break; + case ISD::VECTOR_SHUFFLE: + SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); + break; + + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CONVERT_RNDSAT: + case ISD::CTLZ: + case ISD::CTTZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + SplitVecRes_UnaryOp(N, Lo, Hi); + break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SDIV: + case ISD::UDIV: + case ISD::FDIV: + case ISD::FPOW: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::UREM: + case ISD::SREM: + case ISD::FREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + SplitVecRes_BinOp(N, Lo, Hi); + break; + case ISD::FMA: + SplitVecRes_TernaryOp(N, Lo, Hi); + break; + } + + // If Lo/Hi is null, the sub-method took care of registering results etc. + if (Lo.getNode()) + SetSplitVector(SDValue(N, ResNo), Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDValue RHSLo, RHSHi; + GetSplitVector(N->getOperand(1), RHSLo, RHSHi); + SDLoc dl(N); + + const SDNodeFlags *Flags = N->getFlags(); + unsigned Opcode = N->getOpcode(); + Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); + Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); +} + +void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Op0Lo, Op0Hi; + GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); + SDValue Op1Lo, Op1Hi; + GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); + SDValue Op2Lo, Op2Hi; + GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); + SDLoc dl(N); + + Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), + Op0Lo, Op1Lo, Op2Lo); + Hi = DAG.getNode(N->getOpcode(), dl, Op0Hi.getValueType(), + Op0Hi, Op1Hi, Op2Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // We know the result is a vector. The input may be either a vector or a + // scalar value. + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); + + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + + // Handle some special cases efficiently. + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + case TargetLowering::TypePromoteInteger: + case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeWidenVector: + break; + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + // A scalar to vector conversion, where the scalar needs expansion. + // If the vector is being split in two then we can just convert the + // expanded pieces. + if (LoVT == HiVT) { + GetExpandedOp(InOp, Lo, Hi); + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + break; + case TargetLowering::TypeSplitVector: + // If the input is a vector that needs to be split, convert each split + // piece of the input now. + GetSplitVector(InOp, Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); + return; + } + + // In the general case, convert the input to an integer and split it by hand. + EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); + EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); + if (DAG.getDataLayout().isBigEndian()) + std::swap(LoIntVT, HiIntVT); + + SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); + Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + unsigned LoNumElts = LoVT.getVectorNumElements(); + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); + Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, LoOps); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); + Hi = DAG.getNode(ISD::BUILD_VECTOR, dl, HiVT, HiOps); +} + +void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, + SDValue &Hi) { + assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); + SDLoc dl(N); + unsigned NumSubvectors = N->getNumOperands() / 2; + if (NumSubvectors == 1) { + Lo = N->getOperand(0); + Hi = N->getOperand(1); + return; + } + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); + Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); + + SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); + Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); +} + +void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, dl, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); +} + +void DAGTypeLegalizer::SplitVecRes_FCOPYSIGN(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc DL(N); + + SDValue RHSLo, RHSHi; + SDValue RHS = N->getOperand(1); + EVT RHSVT = RHS.getValueType(); + if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) + GetSplitVector(RHS, RHSLo, RHSHi); + else + std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); + + + Lo = DAG.getNode(ISD::FCOPYSIGN, DL, LHSLo.getValueType(), LHSLo, RHSLo); + Hi = DAG.getNode(ISD::FCOPYSIGN, DL, LHSHi.getValueType(), LHSHi, RHSHi); +} + +void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue LHSLo, LHSHi; + GetSplitVector(N->getOperand(0), LHSLo, LHSHi); + SDLoc dl(N); + + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); + + Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, + DAG.getValueType(LoVT)); + Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, + DAG.getValueType(HiVT)); +} + +void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue Elt = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { + unsigned IdxVal = CIdx->getZExtValue(); + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + if (IdxVal < LoNumElts) + Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, + Lo.getValueType(), Lo, Elt, Idx); + else + Hi = + DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, + DAG.getConstant(IdxVal - LoNumElts, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + return; + } + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return; + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new element. This may be larger than the vector element type, + // so use a truncating store. + SDValue EltPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = DAG.getDataLayout().getPrefTypeAlignment(VecType); + Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT, + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, dl, + StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + +void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); + Hi = DAG.getUNDEF(HiVT); +} + +void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, + SDValue &Hi) { + assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + ISD::LoadExtType ExtType = LD->getExtensionType(); + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); + EVT MemoryVT = LD->getMemoryVT(); + unsigned Alignment = LD->getOriginalAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, + isInvariant, Alignment, AAInfo); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, + LD->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(LD, 1), Ch); +} + +void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MLD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); + + SDValue Ch = MLD->getChain(); + SDValue Ptr = MLD->getBasePtr(); + SDValue Mask = MLD->getMask(); + SDValue Src0 = MLD->getSrc0(); + unsigned Alignment = MLD->getOriginalAlignment(); + ISD::LoadExtType ExtType = MLD->getExtensionType(); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MLD->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MLD->getAAInfo(), MLD->getRanges()); + + Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, + ExtType); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, dl, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MLD->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); + + Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, + ExtType); + + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MLD, 1), Ch); + +} + +void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, + SDValue &Lo, SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); + SDValue Index = MGT->getIndex(); + unsigned Alignment = MGT->getOriginalAlignment(); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + // Split MemoryVT + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + SDValue IndexHi, IndexLo; + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo, + MMO); + + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi, + MMO); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); +} + + +void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + + EVT LoVT, HiVT; + SDLoc DL(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the input. + SDValue LL, LH, RL, RH; + std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); +} + +void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + EVT LoVT, HiVT; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. + EVT InVT = N->getOperand(0).getValueType(); + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), Lo, Hi); + else + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); + SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } +} + +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + +void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, + SDValue &Lo, SDValue &Hi) { + // The low and high parts of the original input give four input vectors. + SDValue Inputs[4]; + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); + GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); + EVT NewVT = Inputs[0].getValueType(); + unsigned NewElts = NewVT.getVectorNumElements(); + + // If Lo or Hi uses elements from at most two of the four input vectors, then + // express it as a vector shuffle of those two inputs. Otherwise extract the + // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. + SmallVector<int, 16> Ops; + for (unsigned High = 0; High < 2; ++High) { + SDValue &Output = High ? Hi : Lo; + + // Build a shuffle mask for the output, discovering on the fly which + // input vectors to use as shuffle operands (recorded in InputUsed). + // If building a suitable shuffle vector proves too hard, then bail + // out with useBuildVector set. + unsigned InputUsed[2] = { -1U, -1U }; // Not yet discovered. + unsigned FirstMaskIdx = High * NewElts; + bool useBuildVector = false; + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element does not index into any input vector. + Ops.push_back(-1); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Find or create a shuffle vector operand to hold this input. + unsigned OpNo; + for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) { + if (InputUsed[OpNo] == Input) { + // This input vector is already an operand. + break; + } else if (InputUsed[OpNo] == -1U) { + // Create a new operand for this input vector. + InputUsed[OpNo] = Input; + break; + } + } + + if (OpNo >= array_lengthof(InputUsed)) { + // More than two input vectors used! Give up on trying to create a + // shuffle vector. Insert all elements into a BUILD_VECTOR instead. + useBuildVector = true; + break; + } + + // Add the mask index for the new shuffle vector. + Ops.push_back(Idx + OpNo * NewElts); + } + + if (useBuildVector) { + EVT EltVT = NewVT.getVectorElementType(); + SmallVector<SDValue, 16> SVOps; + + // Extract the input elements by hand. + for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) { + // The mask element. This indexes into the input. + int Idx = N->getMaskElt(FirstMaskIdx + MaskOffset); + + // The input vector this mask element indexes into. + unsigned Input = (unsigned)Idx / NewElts; + + if (Input >= array_lengthof(Inputs)) { + // The mask element is "undef" or indexes off the end of the input. + SVOps.push_back(DAG.getUNDEF(EltVT)); + continue; + } + + // Turn the index into an offset from the start of the input vector. + Idx -= Input * NewElts; + + // Extract the vector element by hand. + SVOps.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Inputs[Input], + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + + // Construct the Lo/Hi output using a BUILD_VECTOR. + Output = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, SVOps); + } else if (InputUsed[0] == -1U) { + // No input vectors were used! The result is undefined. + Output = DAG.getUNDEF(NewVT); + } else { + SDValue Op0 = Inputs[InputUsed[0]]; + // If only one input was used, use an undefined vector for the other. + SDValue Op1 = InputUsed[1] == -1U ? + DAG.getUNDEF(NewVT) : Inputs[InputUsed[1]]; + // At least one input vector was used. Create a new shuffle vector. + Output = DAG.getVectorShuffle(NewVT, dl, Op0, Op1, &Ops[0]); + } + + Ops.clear(); + } +} + + +//===----------------------------------------------------------------------===// +// Operand Vector Splitting +//===----------------------------------------------------------------------===// + +/// SplitVectorOperand - This method is called when the specified operand of the +/// specified node is found to need vector splitting. At this point, all of the +/// result types of the node are known to be legal, but other operands of the +/// node may need legalization as well as the specified one. +bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Split node operand: "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + if (!Res.getNode()) { + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + report_fatal_error("Do not know how to split this operator's " + "operand!\n"); + + case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; + case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; + case ISD::TRUNCATE: + Res = SplitVecOp_TruncateHelper(N); + break; + case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; + case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break; + case ISD::STORE: + Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); + break; + case ISD::MSTORE: + Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); + break; + case ISD::MSCATTER: + Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo); + break; + case ISD::MGATHER: + Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo); + break; + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + if (N->getValueType(0).bitsLT(N->getOperand(0)->getValueType(0))) + Res = SplitVecOp_TruncateHelper(N); + else + Res = SplitVecOp_UnaryOp(N); + break; + case ISD::CTTZ: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::FP_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::ANY_EXTEND: + case ISD::FTRUNC: + Res = SplitVecOp_UnaryOp(N); + break; + } + } + + // If the result is null, the sub-method took care of registering results etc. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { + // The only possibility for an illegal operand is the mask, since result type + // legalization would have handled this node already otherwise. + assert(OpNo == 0 && "Illegal operand must be mask"); + + SDValue Mask = N->getOperand(0); + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + EVT Src0VT = Src0.getValueType(); + SDLoc DL(N); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); + + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Lo and Hi have differing types"); + + EVT LoOpVT, HiOpVT; + std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); + + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); + + SDValue LoSelect = + DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); + SDValue HiSelect = + DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); +} + +SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc dl(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { + // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will + // end up being split all the way down to individual components. Convert the + // split pieces into integers and reassemble. + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + Lo = BitConvertToInteger(Lo); + Hi = BitConvertToInteger(Hi); + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), + JoinIntegers(Lo, Hi)); +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + // We know that the extracted result type is legal. + EVT SubVT = N->getValueType(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + + if (IdxVal < LoElts) { + assert(IdxVal + SubVT.getVectorNumElements() <= LoElts && + "Extracted subvector crosses vector split!"); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); + } else { + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, + DAG.getConstant(IdxVal - LoElts, dl, + Idx.getValueType())); + } +} + +SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue Vec = N->getOperand(0); + SDValue Idx = N->getOperand(1); + EVT VecVT = Vec.getValueType(); + + if (isa<ConstantSDNode>(Idx)) { + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!"); + + SDValue Lo, Hi; + GetSplitVector(Vec, Lo, Hi); + + uint64_t LoElts = Lo.getValueType().getVectorNumElements(); + + if (IdxVal < LoElts) + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, + DAG.getConstant(IdxVal - LoElts, SDLoc(N), + Idx.getValueType())), 0); + } + + // See if the target wants to custom expand this node. + if (CustomLowerNode(N, N->getValueType(0), true)) + return SDValue(); + + // Make the vector elements byte-addressable if they aren't already. + SDLoc dl(N); + EVT EltVT = VecVT.getVectorElementType(); + if (EltVT.getSizeInBits() < 8) { + SmallVector<SDValue, 4> ElementOps; + for (unsigned i = 0; i < VecVT.getVectorNumElements(); ++i) { + ElementOps.push_back(DAG.getAnyExtOrTrunc( + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, + DAG.getConstant(i, dl, MVT::i8)), + dl, MVT::i8)); + } + + EltVT = MVT::i8; + VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + VecVT.getVectorNumElements()); + Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, ElementOps); + } + + // Store the vector to the stack. + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Load back the required element. + StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); + return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + MachinePointerInfo(), EltVT, false, false, false, 0); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, + unsigned OpNo) { + EVT LoVT, HiVT; + SDLoc dl(MGT); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0)); + + SDValue Ch = MGT->getChain(); + SDValue Ptr = MGT->getBasePtr(); + SDValue Index = MGT->getIndex(); + SDValue Mask = MGT->getMask(); + SDValue Src0 = MGT->getValue(); + unsigned Alignment = MGT->getOriginalAlignment(); + + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + + EVT MemoryVT = MGT->getMemoryVT(); + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue Src0Lo, Src0Hi; + if (getTypeAction(Src0.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Src0, Src0Lo, Src0Hi); + else + std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, dl); + + SDValue IndexHi, IndexLo; + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl); + + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), MGT->getRanges()); + + SDValue OpsLo[] = {Ch, Src0Lo, MaskLo, Ptr, IndexLo}; + SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, + OpsLo, MMO); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(MGT->getPointerInfo(), + MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), + Alignment, MGT->getAAInfo(), + MGT->getRanges()); + + SDValue OpsHi[] = {Ch, Src0Hi, MaskHi, Ptr, IndexHi}; + SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, + OpsHi, MMO); + + // Build a factor node to remember that this load is independent of the + // other one. + Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), + Hi.getValue(1)); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(MGT, 1), Ch); + + SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo, + Hi); + ReplaceValueWith(SDValue(MGT, 0), Res); + return SDValue(); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + + MaskLo = PromoteTargetBoolean(MaskLo, DataLo.getValueType()); + MaskHi = PromoteTargetBoolean(MaskHi, DataHi.getValueType()); + + // if Alignment is equal to the vector size, + // take the half of it for the second part + unsigned SecondHalfAlignment = + (Alignment == Data->getValueType(0).getSizeInBits()/8) ? + Alignment/2 : Alignment; + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, + N->isTruncatingStore()); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + SecondHalfAlignment, N->getAAInfo(), N->getRanges()); + + Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, + N->isTruncatingStore()); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N, + unsigned OpNo) { + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + SDValue Mask = N->getMask(); + SDValue Index = N->getIndex(); + SDValue Data = N->getValue(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + SDLoc DL(N); + + // Split all operands + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + SDValue DataLo, DataHi; + if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) + // Split Data operand + GetSplitVector(Data, DataLo, DataHi); + else + std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); + + SDValue MaskLo, MaskHi; + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + // Split Mask operand + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); + + SDValue IndexHi, IndexLo; + if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Index, IndexLo, IndexHi); + else + std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL); + + SDValue Lo, Hi; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, LoMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo}; + Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(), + DL, OpsLo, MMO); + + MMO = DAG.getMachineFunction(). + getMachineMemOperand(N->getPointerInfo(), + MachineMemOperand::MOStore, HiMemVT.getStoreSize(), + Alignment, N->getAAInfo(), N->getRanges()); + + SDValue OpsHi[] = {Ch, DataHi, MaskHi, Ptr, IndexHi}; + Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(), + DL, OpsHi, MMO); + + // Build a factor node to remember that this store is independent of the + // other one. + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { + assert(N->isUnindexed() && "Indexed store of vector?"); + assert(OpNo == 1 && "Can only split the stored value"); + SDLoc DL(N); + + bool isTruncating = N->isTruncatingStore(); + SDValue Ch = N->getChain(); + SDValue Ptr = N->getBasePtr(); + EVT MemoryVT = N->getMemoryVT(); + unsigned Alignment = N->getOriginalAlignment(); + bool isVol = N->isVolatile(); + bool isNT = N->isNonTemporal(); + AAMDNodes AAInfo = N->getAAInfo(); + SDValue Lo, Hi; + GetSplitVector(N->getOperand(1), Lo, Hi); + + EVT LoMemVT, HiMemVT; + std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); + + unsigned IncrementSize = LoMemVT.getSizeInBits()/8; + + if (isTruncating) + Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), + LoMemVT, isVol, isNT, Alignment, AAInfo); + else + Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), + isVol, isNT, Alignment, AAInfo); + + // Increment the pointer to the other half. + Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, + DAG.getConstant(IncrementSize, DL, Ptr.getValueType())); + + if (isTruncating) + Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + HiMemVT, isVol, isNT, Alignment, AAInfo); + else + Hi = DAG.getStore(Ch, DL, Hi, Ptr, + N->getPointerInfo().getWithOffset(IncrementSize), + isVol, isNT, Alignment, AAInfo); + + return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { + SDLoc DL(N); + + // The input operands all must have the same type, and we know the result + // type is valid. Convert this to a buildvector which extracts all the + // input elements. + // TODO: If the input elements are power-two vectors, we could convert this to + // a new CONCAT_VECTORS node with elements that are half-wide. + SmallVector<SDValue, 32> Elts; + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (const SDValue &Op : N->op_values()) { + for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); + i != e; ++i) { + Elts.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); + } + } + + return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0), Elts); +} + +SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { + // The result type is legal, but the input type is illegal. If splitting + // ends up with the result type of each half still being legal, just + // do that. If, however, that would result in an illegal result type, + // we can try to get more clever with power-two vectors. Specifically, + // split the input type, but also widen the result element size, then + // concatenate the halves and truncate again. For example, consider a target + // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit + // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: + // %inlo = v4i32 extract_subvector %in, 0 + // %inhi = v4i32 extract_subvector %in, 4 + // %lo16 = v4i16 trunc v4i32 %inlo + // %hi16 = v4i16 trunc v4i32 %inhi + // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 + // %res = v8i8 trunc v8i16 %in16 + // + // Without this transform, the original truncate would end up being + // scalarized, which is pretty much always a last resort. + SDValue InVec = N->getOperand(0); + EVT InVT = InVec->getValueType(0); + EVT OutVT = N->getValueType(0); + unsigned NumElements = OutVT.getVectorNumElements(); + bool IsFloat = OutVT.isFloatingPoint(); + + // Widening should have already made sure this is a power-two vector + // if we're trying to split it at all. assert() that's true, just in case. + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + + unsigned InElementSize = InVT.getVectorElementType().getSizeInBits(); + unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits(); + + // If the input elements are only 1/2 the width of the result elements, + // just use the normal splitting. Our trick only work if there's room + // to split more than once. + if (InElementSize <= OutElementSize * 2) + return SplitVecOp_UnaryOp(N); + SDLoc DL(N); + + // Extract the halves of the input via extract_subvector. + SDValue InLoVec, InHiVec; + std::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); + // Truncate them to 1/2 the element size. + EVT HalfElementVT = IsFloat ? + EVT::getFloatingPointVT(InElementSize/2) : + EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); + EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, + NumElements/2); + SDValue HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); + SDValue HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); + // Concatenate them to get the full intermediate truncation result. + EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); + SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, + HalfHi); + // Now finish up by truncating all the way down to the original result + // type. This should normally be something that ends up being legal directly, + // but in theory if a target has very wide vectors and an annoyingly + // restricted set of legal types, this split can chain to build things up. + return IsFloat + ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, + DAG.getTargetConstant( + 0, DL, TLI.getPointerTy(DAG.getDataLayout()))) + : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); +} + +SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operand types must be vectors"); + // The result has a legal vector type, but the input needs splitting. + SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; + SDLoc DL(N); + GetSplitVector(N->getOperand(0), Lo0, Hi0); + GetSplitVector(N->getOperand(1), Lo1, Hi1); + unsigned PartElements = Lo0.getValueType().getVectorNumElements(); + EVT PartResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, PartElements); + EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 2*PartElements); + + LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); + HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); + SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); + return PromoteTargetBoolean(Con, N->getValueType(0)); +} + + +SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { + // The result has a legal vector type, but the input needs splitting. + EVT ResVT = N->getValueType(0); + SDValue Lo, Hi; + SDLoc DL(N); + GetSplitVector(N->getOperand(0), Lo, Hi); + EVT InVT = Lo.getValueType(); + + EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), + InVT.getVectorNumElements()); + + Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); +} + +SDValue DAGTypeLegalizer::SplitVecOp_FCOPYSIGN(SDNode *N) { + // The result (and the first input) has a legal vector type, but the second + // input needs splitting. + return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); +} + + +//===----------------------------------------------------------------------===// +// Result Vector Widening +//===----------------------------------------------------------------------===// + +void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { + DEBUG(dbgs() << "Widen node result " << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + + // See if the target wants to custom widen this node. + if (CustomWidenLowerNode(N, N->getValueType(ResNo))) + return; + + SDValue Res = SDValue(); + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorResult #" << ResNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen the result of this operator!"); + + case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; + case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; + case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; + case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; + case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; + case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break; + case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; + case ISD::VSELECT: + case ISD::SELECT: Res = WidenVecRes_SELECT(N); break; + case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; + case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; + case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; + case ISD::VECTOR_SHUFFLE: + Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); + break; + case ISD::MLOAD: + Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); + break; + case ISD::MGATHER: + Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N)); + break; + + case ISD::ADD: + case ISD::AND: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNAN: + case ISD::FMAXNAN: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + Res = WidenVecRes_Binary(N); + break; + + case ISD::FADD: + case ISD::FMUL: + case ISD::FPOW: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + Res = WidenVecRes_BinaryCanTrap(N); + break; + + case ISD::FCOPYSIGN: + Res = WidenVecRes_FCOPYSIGN(N); + break; + + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + Res = WidenVecRes_Shift(N); + break; + + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: + Res = WidenVecRes_Convert(N); + break; + + case ISD::BITREVERSE: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTPOP: + case ISD::CTTZ: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FROUND: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + Res = WidenVecRes_Unary(N); + break; + case ISD::FMA: + Res = WidenVecRes_Ternary(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (Res.getNode()) + SetWidenedVector(SDValue(N, ResNo), Res); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { + // Ternary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + SDValue InOp3 = GetWidenedVector(N->getOperand(2)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { + // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. + unsigned Opcode = N->getOpcode(); + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT WidenEltVT = WidenVT.getVectorElementType(); + EVT VT = WidenVT; + unsigned NumElts = VT.getVectorNumElements(); + const SDNodeFlags *Flags = N->getFlags(); + while (!TLI.isTypeLegal(VT) && NumElts != 1) { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } + + if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { + // Operation doesn't trap so just widen as normal. + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); + } + + // No legal vector version so unroll the vector operation and then widen. + if (NumElts == 1) + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); + + // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); + + SmallVector<SDValue, 16> ConcatOps(CurNumElts); + unsigned ConcatEnd = 0; // Current ConcatOps index. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest legal vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } + while (CurNumElts != 0) { + while (CurNumElts >= NumElts) { + SDValue EOp1 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp2 = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); + Idx += NumElts; + CurNumElts -= NumElts; + } + do { + NumElts = NumElts / 2; + VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); + } while (!TLI.isTypeLegal(VT) && NumElts != 1); + + if (NumElts == 1) { + for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { + SDValue EOp1 = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + SDValue EOp2 = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2, Flags); + } + CurNumElts = 0; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; + VT = ConcatOps[Idx--].getValueType(); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeLegal(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, ConcatOps[OpIdx], + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + ConcatOps[Idx+1] = VecOp; + ConcatEnd = Idx + 2; + } else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, SubConcatOps); + ConcatEnd = SubConcatIdx + 1; + } + } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } + + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); + if (NumOps != ConcatEnd ) { + SDValue UndefVal = DAG.getUNDEF(MaxVT); + for (unsigned j = ConcatEnd; j < NumOps; ++j) + ConcatOps[j] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(ConcatOps.data(), NumOps)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { + SDValue InOp = N->getOperand(0); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + unsigned Opcode = N->getOpcode(); + unsigned InVTNumElts = InVT.getVectorNumElements(); + const SDNodeFlags *Flags = N->getFlags(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(N->getOperand(0)); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) { + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InOp); + return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); + } + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVec); + return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); + } + + if (InVTNumElts % WidenNumElts == 0) { + SDValue InVal = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + // Extract the input and convert the shorten input vector. + if (N->getNumOperands() == 1) + return DAG.getNode(Opcode, DL, WidenVT, InVal); + return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + if (N->getNumOperands() == 1) + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); + else + Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { + // If this is an FCOPYSIGN with same input types, we can treat it as a + // normal (can trap) binary op. + if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) + return WidenVecRes_BinaryCanTrap(N); + + // If the types are different, fall back to unrolling. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); +} + +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + + EVT ShVT = ShOp.getValueType(); + if (getTypeAction(ShVT) == TargetLowering::TypeWidenVector) { + ShOp = GetWidenedVector(ShOp); + ShVT = ShOp.getValueType(); + } + EVT ShWidenVT = EVT::getVectorVT(*DAG.getContext(), + ShVT.getVectorElementType(), + WidenVT.getVectorNumElements()); + if (ShVT != ShWidenVT) + ShOp = ModifyToType(ShOp, ShWidenVT); + + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { + // Unary op widening. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); +} + +SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), + cast<VTSDNode>(N->getOperand(1))->getVT() + .getVectorElementType(), + WidenVT.getVectorNumElements()); + SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, WidenLHS, DAG.getValueType(ExtVT)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { + SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); + return GetWidenedVector(WidenVec); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { + SDValue InOp = N->getOperand(0); + EVT InVT = InOp.getValueType(); + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDLoc dl(N); + + switch (getTypeAction(InVT)) { + case TargetLowering::TypeLegal: + break; + case TargetLowering::TypePromoteInteger: + // If the incoming type is a vector that is being promoted, then + // we know that the elements are arranged differently and that we + // must perform the conversion using a stack slot. + if (InVT.isVector()) + break; + + // If the InOp is promoted to the same size, convert it. Otherwise, + // fall out of the switch and widen the promoted input. + InOp = GetPromotedInteger(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + case TargetLowering::TypeSoftenFloat: + case TargetLowering::TypePromoteFloat: + case TargetLowering::TypeExpandInteger: + case TargetLowering::TypeExpandFloat: + case TargetLowering::TypeScalarizeVector: + case TargetLowering::TypeSplitVector: + break; + case TargetLowering::TypeWidenVector: + // If the InOp is widened to the same size, convert it. Otherwise, fall + // out of the switch and widen the widened input. + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + if (WidenVT.bitsEq(InVT)) + // The input widens to the same size. Convert to the widen value. + return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); + break; + } + + unsigned WidenSize = WidenVT.getSizeInBits(); + unsigned InSize = InVT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) { + // Determine new input vector type. The new input vector type will use + // the same element type (if its a vector) or use the input type as a + // vector. It is the same size as the type to widen to. + EVT NewInVT; + unsigned NewNumElts = WidenSize / InSize; + if (InVT.isVector()) { + EVT InEltVT = InVT.getVectorElementType(); + NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, + WidenSize / InEltVT.getSizeInBits()); + } else { + NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); + } + + if (TLI.isTypeLegal(NewInVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + SmallVector<SDValue, 16> Ops(NewNumElts); + SDValue UndefVal = DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i < NewNumElts; ++i) + Ops[i] = UndefVal; + + SDValue NewVec; + if (InVT.isVector()) + NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); + else + NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); + } + } + + return CreateStackStoreLoad(InOp, WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { + SDLoc dl(N); + // Build a vector with undefined for the new nodes. + EVT VT = N->getValueType(0); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SmallVector<SDValue, 16> NewOps(N->op_begin(), N->op_end()); + assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); + NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, NewOps); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { + EVT InVT = N->getOperand(0).getValueType(); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDLoc dl(N); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + unsigned NumInElts = InVT.getVectorNumElements(); + unsigned NumOperands = N->getNumOperands(); + + bool InputWidened = false; // Indicates we need to widen the input. + if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { + if (WidenVT.getVectorNumElements() % InVT.getVectorNumElements() == 0) { + // Add undef vectors to widen to correct length. + unsigned NumConcat = WidenVT.getVectorNumElements() / + InVT.getVectorNumElements(); + SDValue UndefVal = DAG.getUNDEF(InVT); + SmallVector<SDValue, 16> Ops(NumConcat); + for (unsigned i=0; i < NumOperands; ++i) + Ops[i] = N->getOperand(i); + for (unsigned i = NumOperands; i != NumConcat; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); + } + } else { + InputWidened = true; + if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { + // The inputs and the result are widen to the same value. + unsigned i; + for (i=1; i < NumOperands; ++i) + if (N->getOperand(i).getOpcode() != ISD::UNDEF) + break; + + if (i == NumOperands) + // Everything but the first operand is an UNDEF so just return the + // widened first operand. + return GetWidenedVector(N->getOperand(0)); + + if (NumOperands == 2) { + // Replace concat of two operands with a shuffle. + SmallVector<int, 16> MaskOps(WidenNumElts, -1); + for (unsigned i = 0; i < NumInElts; ++i) { + MaskOps[i] = i; + MaskOps[i + NumInElts] = i + WidenNumElts; + } + return DAG.getVectorShuffle(WidenVT, dl, + GetWidenedVector(N->getOperand(0)), + GetWidenedVector(N->getOperand(1)), + &MaskOps[0]); + } + } + } + + // Fall back to use extracts and build vector. + EVT EltVT = WidenVT.getVectorElementType(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Idx = 0; + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (InputWidened) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; Idx < WidenNumElts; ++Idx) + Ops[Idx] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { + SDLoc dl(N); + SDValue InOp = N->getOperand(0); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + EVT InWidenVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, WidenNumElts); + + SDValue DTyOp = DAG.getValueType(WidenVT); + SDValue STyOp = DAG.getValueType(InWidenVT); + ISD::CvtCode CvtCode = cast<CvtRndSatSDNode>(N)->getCvtCode(); + + unsigned InVTNumElts = InVT.getVectorNumElements(); + if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { + InOp = GetWidenedVector(InOp); + InVT = InOp.getValueType(); + InVTNumElts = InVT.getVectorNumElements(); + if (InVTNumElts == WidenNumElts) + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (TLI.isTypeLegal(InWidenVT)) { + // Because the result and the input are different vector types, widening + // the result could create a legal type but widening the input might make + // it an illegal type that might lead to repeatedly splitting the input + // and then widening it. To avoid this, we widen the input only if + // it results in a legal type. + if (WidenNumElts % InVTNumElts == 0) { + // Widen the input and call convert on the widened input vector. + unsigned NumConcat = WidenNumElts/InVTNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + Ops[0] = InOp; + SDValue UndefVal = DAG.getUNDEF(InVT); + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = UndefVal; + + InOp = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT, Ops); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + if (InVTNumElts % WidenNumElts == 0) { + // Extract the input and convert the shorten input vector. + InOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + } + + // Otherwise unroll into some nasty scalar code and rebuild the vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = WidenVT.getVectorElementType(); + DTyOp = DAG.getValueType(EltVT); + STyOp = DAG.getValueType(InEltVT); + + unsigned MinElts = std::min(InVTNumElts, WidenNumElts); + unsigned i; + for (i=0; i < MinElts; ++i) { + SDValue ExtVal = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, + SatOp, CvtCode); + } + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { + EVT VT = N->getValueType(0); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + SDValue Idx = N->getOperand(1); + SDLoc dl(N); + + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + + EVT InVT = InOp.getValueType(); + + // Check if we can just return the input vector after widening. + uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); + if (IdxVal == 0 && InVT == WidenVT) + return InOp; + + // Check if we can extract from the vector. + unsigned InNumElts = InVT.getVectorNumElements(); + if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); + + // We could try widening the input to the right length but for now, extract + // the original elements, fill the rest with undefs and build a vector. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = VT.getVectorElementType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned i; + for (i=0; i < NumElts; ++i) + Ops[i] = + DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(IdxVal + i, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i < WidenNumElts; ++i) + Ops[i] = UndefVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), + InOp.getValueType(), InOp, + N->getOperand(1), N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { + LoadSDNode *LD = cast<LoadSDNode>(N); + ISD::LoadExtType ExtType = LD->getExtensionType(); + + SDValue Result; + SmallVector<SDValue, 16> LdChain; // Chain for the series of load + if (ExtType != ISD::NON_EXTLOAD) + Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); + else + Result = GenWidenVectorLoads(LdChain, LD); + + // If we generate a single load, we can use that for the chain. Otherwise, + // build a factor node to remember the multiple loads are independent and + // chain to that. + SDValue NewChain; + if (LdChain.size() == 1) + NewChain = LdChain[0]; + else + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); + + // Modified the chain - switch anything that used the old chain to use + // the new one. + ReplaceValueWith(SDValue(N, 1), NewChain); + + return Result; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); + SDValue Mask = N->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue Src0 = GetWidenedVector(N->getSrc0()); + ISD::LoadExtType ExtType = N->getExtensionType(); + SDLoc dl(N); + + if (getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + EVT BoolVT = getSetCCResultType(WidenVT); + + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + + SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, Src0, N->getMemoryVT(), + N->getMemOperand(), ExtType); + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { + + EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue Src0 = GetWidenedVector(N->getValue()); + unsigned NumElts = WideVT.getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen the Index operand + SDValue Index = N->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + SDValue Ops[] = { N->getChain(), Src0, Mask, N->getBasePtr(), Index }; + SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), + N->getMemoryVT(), dl, Ops, + N->getMemOperand()); + + // Legalize the chain result - switch anything that used the old chain to + // use the new one. + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + +SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), + WidenVT, N->getOperand(0)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue Cond1 = N->getOperand(0); + EVT CondVT = Cond1.getValueType(); + if (CondVT.isVector()) { + EVT CondEltVT = CondVT.getVectorElementType(); + EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), + CondEltVT, WidenNumElts); + if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) + Cond1 = GetWidenedVector(Cond1); + + // If we have to split the condition there is no point in widening the + // select. This would result in an cycle of widening the select -> + // widening the condition operand -> splitting the condition operand -> + // splitting the select -> widening the select. Instead split this select + // further and widen the resulting type. + if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { + SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); + SDValue Res = ModifyToType(SplitSelect, WidenVT); + return Res; + } + + if (Cond1.getValueType() != CondWidenVT) + Cond1 = ModifyToType(Cond1, CondWidenVT); + } + + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDValue InOp2 = GetWidenedVector(N->getOperand(2)); + assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); + return DAG.getNode(N->getOpcode(), SDLoc(N), + WidenVT, Cond1, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { + SDValue InOp1 = GetWidenedVector(N->getOperand(2)); + SDValue InOp2 = GetWidenedVector(N->getOperand(3)); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), + InOp1.getValueType(), N->getOperand(0), + N->getOperand(1), InOp1, InOp2, N->getOperand(4)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { + assert(N->getValueType(0).isVector() == + N->getOperand(0).getValueType().isVector() && + "Scalar/Vector type mismatch"); + if (N->getValueType(0).isVector()) return WidenVecRes_VSETCC(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, + InOp1, InOp2, N->getOperand(2)); +} + +SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + return DAG.getUNDEF(WidenVT); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + unsigned NumElts = VT.getVectorNumElements(); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Adjust mask based on new input vector length. + SmallVector<int, 16> NewMask; + for (unsigned i = 0; i != NumElts; ++i) { + int Idx = N->getMaskElt(i); + if (Idx < (int)NumElts) + NewMask.push_back(Idx); + else + NewMask.push_back(Idx - NumElts + WidenNumElts); + } + for (unsigned i = NumElts; i != WidenNumElts; ++i) + NewMask.push_back(-1); + return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, &NewMask[0]); +} + +SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { + assert(N->getValueType(0).isVector() && + N->getOperand(0).getValueType().isVector() && + "Operands must be vectors"); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + + SDValue InOp1 = N->getOperand(0); + EVT InVT = InOp1.getValueType(); + assert(InVT.isVector() && "can not widen non-vector type"); + EVT WidenInVT = EVT::getVectorVT(*DAG.getContext(), + InVT.getVectorElementType(), WidenNumElts); + + // The input and output types often differ here, and it could be that while + // we'd prefer to widen the result type, the input operands have been split. + // In this case, we also need to split the result of this node as well. + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + SDValue SplitVSetCC = SplitVecOp_VSETCC(N); + SDValue Res = ModifyToType(SplitVSetCC, WidenVT); + return Res; + } + + InOp1 = GetWidenedVector(InOp1); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + + // Assume that the input and output will be widen appropriately. If not, + // we will have to unroll it at some point. + assert(InOp1.getValueType() == WidenInVT && + InOp2.getValueType() == WidenInVT && + "Input not widened to expected type!"); + (void)WidenInVT; + return DAG.getNode(ISD::SETCC, SDLoc(N), + WidenVT, InOp1, InOp2, N->getOperand(2)); +} + + +//===----------------------------------------------------------------------===// +// Widen Vector Operand +//===----------------------------------------------------------------------===// +bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { + DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"); + SDValue Res = SDValue(); + + // See if the target wants to custom widen this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + + switch (N->getOpcode()) { + default: +#ifndef NDEBUG + dbgs() << "WidenVectorOperand op #" << OpNo << ": "; + N->dump(&DAG); + dbgs() << "\n"; +#endif + llvm_unreachable("Do not know how to widen this operator's operand!"); + + case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; + case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; + case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; + case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; + case ISD::STORE: Res = WidenVecOp_STORE(N); break; + case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; + case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break; + + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + case ISD::TRUNCATE: + Res = WidenVecOp_Convert(N); + break; + } + + // If Res is null, the sub-method took care of registering the result. + if (!Res.getNode()) return false; + + // If the result is N, the sub-method updated N in place. Tell the legalizer + // core about this. + if (Res.getNode() == N) + return true; + + + assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && + "Invalid operand expansion"); + + ReplaceValueWith(SDValue(N, 0), Res); + return false; +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just extending the low lanes is the correct + // transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode( + ISD::INSERT_SUBVECTOR, DL, FixedVT, DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + else + InOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } +} + +SDValue DAGTypeLegalizer::WidenVecOp_FCOPYSIGN(SDNode *N) { + // The result (and first input) is legal, but the second input is illegal. + // We can't do much to fix that, so just unroll and let the extracts off of + // the second input be widened as needed later. + return DAG.UnrollVectorOp(N); +} + +SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { + // Since the result is legal and the input is illegal, it is unlikely + // that we can fix the input to a legal type so unroll the convert + // into some scalar code and create a nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InOp = N->getOperand(0); + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + EVT InVT = InOp.getValueType(); + EVT InEltVT = InVT.getVectorElementType(); + + unsigned Opcode = N->getOpcode(); + SmallVector<SDValue, 16> Ops(NumElts); + for (unsigned i=0; i < NumElts; ++i) + Ops[i] = DAG.getNode( + Opcode, dl, EltVT, + DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, + DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())))); + + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { + EVT VT = N->getValueType(0); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + EVT InWidenVT = InOp.getValueType(); + SDLoc dl(N); + + // Check if we can convert between two legal vector types and extract. + unsigned InWidenSize = InWidenVT.getSizeInBits(); + unsigned Size = VT.getSizeInBits(); + // x86mmx is not an acceptable vector element type, so don't try. + if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) { + unsigned NewNumElts = InWidenSize / Size; + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); + if (TLI.isTypeLegal(NewVT)) { + SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); + return DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + } + + return CreateStackStoreLoad(InOp, VT); +} + +SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { + // If the input vector is not legal, it is likely that we will not find a + // legal vector of the same size. Replace the concatenate vector with a + // nasty build vector. + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + unsigned NumElts = VT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(NumElts); + + EVT InVT = N->getOperand(0).getValueType(); + unsigned NumInElts = InVT.getVectorNumElements(); + + unsigned Idx = 0; + unsigned NumOperands = N->getNumOperands(); + for (unsigned i=0; i < NumOperands; ++i) { + SDValue InOp = N->getOperand(i); + if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) + InOp = GetWidenedVector(InOp); + for (unsigned j=0; j < NumInElts; ++j) + Ops[Idx++] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(j, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { + SDValue InOp = GetWidenedVector(N->getOperand(0)); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), + N->getValueType(0), InOp, N->getOperand(1)); +} + +SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { + // We have to widen the value but we want only to store the original + // vector type. + StoreSDNode *ST = cast<StoreSDNode>(N); + + SmallVector<SDValue, 16> StChain; + if (ST->isTruncatingStore()) + GenWidenVectorTruncStores(StChain, ST); + else + GenWidenVectorStores(StChain, ST); + + if (StChain.size() == 1) + return StChain[0]; + else + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); +} + +SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { + MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); + SDValue Mask = MST->getMask(); + EVT MaskVT = Mask.getValueType(); + SDValue StVal = MST->getValue(); + // Widen the value + SDValue WideVal = GetWidenedVector(StVal); + SDLoc dl(N); + + if (OpNo == 2 || getTypeAction(MaskVT) == TargetLowering::TypeWidenVector) + Mask = GetWidenedVector(Mask); + else { + // The mask should be widened as well + EVT BoolVT = getSetCCResultType(WideVal.getValueType()); + // We can't use ModifyToType() because we should fill the mask with + // zeroes + unsigned WidenNumElts = BoolVT.getVectorNumElements(); + unsigned MaskNumElts = MaskVT.getVectorNumElements(); + + unsigned NumConcat = WidenNumElts / MaskNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue ZeroVal = DAG.getConstant(0, dl, MaskVT); + Ops[0] = Mask; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = ZeroVal; + + Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, BoolVT, Ops); + } + assert(Mask.getValueType().getVectorNumElements() == + WideVal.getValueType().getVectorNumElements() && + "Mask and data vectors should have the same number of elements"); + return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(), + Mask, MST->getMemoryVT(), MST->getMemOperand(), + false); +} + +SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { + assert(OpNo == 1 && "Can widen only data operand of mscatter"); + MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); + SDValue DataOp = MSC->getValue(); + SDValue Mask = MSC->getMask(); + + // Widen the value + SDValue WideVal = GetWidenedVector(DataOp); + EVT WideVT = WideVal.getValueType(); + unsigned NumElts = WideVal.getValueType().getVectorNumElements(); + SDLoc dl(N); + + // The mask should be widened as well + Mask = WidenTargetBoolean(Mask, WideVT, true); + + // Widen index + SDValue Index = MSC->getIndex(); + EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), + Index.getValueType().getScalarType(), + NumElts); + Index = ModifyToType(Index, WideIndexVT); + + SDValue Ops[] = {MSC->getChain(), WideVal, Mask, MSC->getBasePtr(), Index}; + return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), + MSC->getMemoryVT(), dl, Ops, + MSC->getMemOperand()); +} + +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { + SDValue InOp0 = GetWidenedVector(N->getOperand(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(1)); + SDLoc dl(N); + + // WARNING: In this code we widen the compare instruction with garbage. + // This garbage may contain denormal floats which may be slow. Is this a real + // concern ? Should we zero the unused lanes if this is a float compare ? + + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + return PromoteTargetBoolean(CC, N->getValueType(0)); +} + + +//===----------------------------------------------------------------------===// +// Vector Widening Utilities +//===----------------------------------------------------------------------===// + +// Utility function to find the type to chop up a widen vector for load/store +// TLI: Target lowering used to determine legal types. +// Width: Width left need to load/store. +// WidenVT: The widen vector type to load to/store from +// Align: If 0, don't allow use of a wider type +// WidenEx: If Align is not 0, the amount additional we can load/store from. + +static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, + unsigned Width, EVT WidenVT, + unsigned Align = 0, unsigned WidenEx = 0) { + EVT WidenEltVT = WidenVT.getVectorElementType(); + unsigned WidenWidth = WidenVT.getSizeInBits(); + unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); + unsigned AlignInBits = Align*8; + + // If we have one element to load/store, return it. + EVT RetVT = WidenEltVT; + if (Width == WidenEltWidth) + return RetVT; + + // See if there is larger legal integer than the element type to load/store + unsigned VT; + for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE; + VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) { + EVT MemVT((MVT::SimpleValueType) VT); + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; + auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); + if ((Action == TargetLowering::TypeLegal || + Action == TargetLowering::TypePromoteInteger) && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + RetVT = MemVT; + break; + } + } + + // See if there is a larger vector type to load/store that has the same vector + // element type and is evenly divisible with the WidenVT. + for (VT = (unsigned)MVT::LAST_VECTOR_VALUETYPE; + VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { + EVT MemVT = (MVT::SimpleValueType) VT; + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { + if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT) + return MemVT; + } + } + + return RetVT; +} + +// Builds a vector type from scalar loads +// VecTy: Resulting Vector type +// LDOps: Load operators to build a vector type +// [Start,End) the list of loads to use. +static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, + SmallVectorImpl<SDValue> &LdOps, + unsigned Start, unsigned End) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(LdOps[Start]); + EVT LdTy = LdOps[Start].getValueType(); + unsigned Width = VecTy.getSizeInBits(); + unsigned NumElts = Width / LdTy.getSizeInBits(); + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); + + unsigned Idx = 1; + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); + + for (unsigned i = Start + 1; i != End; ++i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + NumElts = Width / NewLdTy.getSizeInBits(); + NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); + VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); + // Readjust position and vector position based on new load type + Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); + LdTy = NewLdTy; + } + VecOp = DAG.getNode( + ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], + DAG.getConstant(Idx++, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); +} + +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD) { + // The strategy assumes that we can efficiently load powers of two widths. + // The routines chops the vector into the largest vector loads with the same + // element type or scalar loads and then recombines it to the widen vector + // type. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + unsigned WidenWidth = WidenVT.getSizeInBits(); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + int LdWidth = LdVT.getSizeInBits(); + int WidthDiff = WidenWidth - LdWidth; // Difference + unsigned LdAlign = (isVolatile) ? 0 : Align; // Allow wider loads + + // Find the vector type that can load from. + EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + int NewVTWidth = NewVT.getSizeInBits(); + SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); + LdChain.push_back(LdOp.getValue(1)); + + // Check if we can load the element with one instruction + if (LdWidth <= NewVTWidth) { + if (!NewVT.isVector()) { + unsigned NumElts = WidenWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); + return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + if (NewVT == WidenVT) + return LdOp; + + assert(WidenWidth % NewVTWidth == 0); + unsigned NumConcat = WidenWidth / NewVTWidth; + SmallVector<SDValue, 16> ConcatOps(NumConcat); + SDValue UndefVal = DAG.getUNDEF(NewVT); + ConcatOps[0] = LdOp; + for (unsigned i = 1; i != NumConcat; ++i) + ConcatOps[i] = UndefVal; + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); + } + + // Load vector by using multiple loads from largest vector to scalar + SmallVector<SDValue, 16> LdOps; + LdOps.push_back(LdOp); + + LdWidth -= NewVTWidth; + unsigned Offset = 0; + + while (LdWidth > 0) { + unsigned Increment = NewVTWidth / 8; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Increment, dl, BasePtr.getValueType())); + + SDValue L; + if (LdWidth < NewVTWidth) { + // Our current type we are using is too large, find a better size + NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); + NewVTWidth = NewVT.getSizeInBits(); + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment), + AAInfo); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment), + AAInfo); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + + + LdWidth -= NewVTWidth; + } + + // Build the vector from the loads operations + unsigned End = LdOps.size(); + if (!LdOps[0].getValueType().isVector()) + // All the loads are scalar loads. + return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); + + // If the load contains vectors, build the vector using concat vector. + // All of the vectors used to loads are power of 2 and the scalars load + // can be combined to make a power of 2 vector. + SmallVector<SDValue, 16> ConcatOps(End); + int i = End - 1; + int Idx = End; + EVT LdTy = LdOps[i].getValueType(); + // First combine the scalar loads to a vector + if (!LdTy.isVector()) { + for (--i; i >= 0; --i) { + LdTy = LdOps[i].getValueType(); + if (LdTy.isVector()) + break; + } + ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i+1, End); + } + ConcatOps[--Idx] = LdOps[i]; + for (--i; i >= 0; --i) { + EVT NewLdTy = LdOps[i].getValueType(); + if (NewLdTy != LdTy) { + // Create a larger vector + ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, + makeArrayRef(&ConcatOps[Idx], End - Idx)); + Idx = End - 1; + LdTy = NewLdTy; + } + ConcatOps[--Idx] = LdOps[i]; + } + + if (WidenWidth == LdTy.getSizeInBits()*(End - Idx)) + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, + makeArrayRef(&ConcatOps[Idx], End - Idx)); + + // We need to fill the rest with undefs to build the vector + unsigned NumOps = WidenWidth / LdTy.getSizeInBits(); + SmallVector<SDValue, 16> WidenOps(NumOps); + SDValue UndefVal = DAG.getUNDEF(LdTy); + { + unsigned i = 0; + for (; i != End-Idx; ++i) + WidenOps[i] = ConcatOps[Idx+i]; + for (; i != NumOps; ++i) + WidenOps[i] = UndefVal; + } + return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); +} + +SDValue +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, + LoadSDNode *LD, + ISD::LoadExtType ExtType) { + // For extension loads, it may not be more efficient to chop up the vector + // and then extended it. Instead, we unroll the load and build a new vector. + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector()); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + unsigned Align = LD->getAlignment(); + bool isVolatile = LD->isVolatile(); + bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); + AAMDNodes AAInfo = LD->getAAInfo(); + + EVT EltVT = WidenVT.getVectorElementType(); + EVT LdEltVT = LdVT.getVectorElementType(); + unsigned NumElts = LdVT.getVectorNumElements(); + + // Load each element and widen + unsigned WidenNumElts = WidenVT.getVectorNumElements(); + SmallVector<SDValue, 16> Ops(WidenNumElts); + unsigned Increment = LdEltVT.getSizeInBits() / 8; + Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, + LD->getPointerInfo(), + LdEltVT, isVolatile, isNonTemporal, isInvariant, + Align, AAInfo); + LdChain.push_back(Ops[0].getValue(1)); + unsigned i = 0, Offset = Increment; + for (i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, + DAG.getConstant(Offset, dl, + BasePtr.getValueType())); + Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, + LD->getPointerInfo().getWithOffset(Offset), LdEltVT, + isVolatile, isNonTemporal, isInvariant, Align, + AAInfo); + LdChain.push_back(Ops[i].getValue(1)); + } + + // Fill the rest with undefs + SDValue UndefVal = DAG.getUNDEF(EltVT); + for (; i != WidenNumElts; ++i) + Ops[i] = UndefVal; + + return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, Ops); +} + + +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, + StoreSDNode *ST) { + // The strategy assumes that we can efficiently store powers of two widths. + // The routines chops the vector into the largest vector stores with the same + // element type or scalar stores. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + SDLoc dl(ST); + + EVT StVT = ST->getMemoryVT(); + unsigned StWidth = StVT.getSizeInBits(); + EVT ValVT = ValOp.getValueType(); + unsigned ValWidth = ValVT.getSizeInBits(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned ValEltWidth = ValEltVT.getSizeInBits(); + assert(StVT.getVectorElementType() == ValEltVT); + + int Idx = 0; // current index to store + unsigned Offset = 0; // offset from base to store + while (StWidth != 0) { + // Find the largest vector type we can store with + EVT NewVT = FindMemType(DAG, TLI, StWidth, ValVT); + unsigned NewVTWidth = NewVT.getSizeInBits(); + unsigned Increment = NewVTWidth / 8; + if (NewVT.isVector()) { + unsigned NumVTElts = NewVT.getVectorNumElements(); + do { + SDValue EOp = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset), AAInfo)); + StWidth -= NewVTWidth; + Offset += Increment; + Idx += NumVTElts; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Increment, dl, + BasePtr.getValueType())); + } while (StWidth != 0 && StWidth >= NewVTWidth); + } else { + // Cast the vector to the scalar type we can store + unsigned NumElts = ValWidth / NewVTWidth; + EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); + SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); + // Readjust index position based on new vector type + Idx = Idx * ValEltWidth / NewVTWidth; + do { + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, + DAG.getConstant(Idx++, dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo().getWithOffset(Offset), + isVolatile, isNonTemporal, + MinAlign(Align, Offset), AAInfo)); + StWidth -= NewVTWidth; + Offset += Increment; + BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, + DAG.getConstant(Increment, dl, + BasePtr.getValueType())); + } while (StWidth != 0 && StWidth >= NewVTWidth); + // Restore index back to be relative to the original widen element type + Idx = Idx * NewVTWidth / ValEltWidth; + } + } +} + +void +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl<SDValue> &StChain, + StoreSDNode *ST) { + // For extension loads, it may not be more efficient to truncate the vector + // and then store it. Instead, we extract each element and then store it. + SDValue Chain = ST->getChain(); + SDValue BasePtr = ST->getBasePtr(); + unsigned Align = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + bool isNonTemporal = ST->isNonTemporal(); + AAMDNodes AAInfo = ST->getAAInfo(); + SDValue ValOp = GetWidenedVector(ST->getValue()); + SDLoc dl(ST); + + EVT StVT = ST->getMemoryVT(); + EVT ValVT = ValOp.getValueType(); + + // It must be true that we the widen vector type is bigger than where + // we need to store. + assert(StVT.isVector() && ValOp.getValueType().isVector()); + assert(StVT.bitsLT(ValOp.getValueType())); + + // For truncating stores, we can not play the tricks of chopping legal + // vector types and bit cast it to the right type. Instead, we unroll + // the store. + EVT StEltVT = StVT.getVectorElementType(); + EVT ValEltVT = ValVT.getVectorElementType(); + unsigned Increment = ValEltVT.getSizeInBits() / 8; + unsigned NumElts = StVT.getVectorNumElements(); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, + ST->getPointerInfo(), StEltVT, + isVolatile, isNonTemporal, Align, + AAInfo)); + unsigned Offset = Increment; + for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { + SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), + BasePtr, + DAG.getConstant(Offset, dl, + BasePtr.getValueType())); + SDValue EOp = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, + ST->getPointerInfo().getWithOffset(Offset), + StEltVT, isVolatile, isNonTemporal, + MinAlign(Align, Offset), AAInfo)); + } +} + +/// Modifies a vector input (widen or narrows) to a vector of NVT. The +/// input vector must have the same element type as NVT. +/// FillWithZeroes specifies that the vector should be widened with zeroes. +SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, + bool FillWithZeroes) { + // Note that InOp might have been widened so it might already have + // the right width or it might need be narrowed. + EVT InVT = InOp.getValueType(); + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + SDLoc dl(InOp); + + // Check if InOp already has the right width. + if (InVT == NVT) + return InOp; + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + if (WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0) { + unsigned NumConcat = WidenNumElts / InNumElts; + SmallVector<SDValue, 16> Ops(NumConcat); + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : + DAG.getUNDEF(InVT); + Ops[0] = InOp; + for (unsigned i = 1; i != NumConcat; ++i) + Ops[i] = FillVal; + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); + } + + if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, + DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + // Fall back to extract and build. + SmallVector<SDValue, 16> Ops(WidenNumElts); + EVT EltVT = NVT.getVectorElementType(); + unsigned MinNumElts = std::min(WidenNumElts, InNumElts); + unsigned Idx; + for (Idx = 0; Idx < MinNumElts; ++Idx) + Ops[Idx] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, + DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for ( ; Idx < WidenNumElts; ++Idx) + Ops[Idx] = FillVal; + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp new file mode 100644 index 0000000..622e06f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -0,0 +1,639 @@ +//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ResourcePriorityQueue class, which is a +// SchedulingPriorityQueue that prioritizes instructions using DFA state to +// reduce the length of the critical path through the basic block +// on VLIW platforms. +// The scheduler is basically a top-down adaptable list scheduler with DFA +// resource tracking added to the cost function. +// DFA is queried as a state machine to model "packets/bundles" during +// schedule. Currently packets/bundles are discarded at the end of +// scheduling, affecting only order of instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "scheduler" + +static cl::opt<bool> DisableDFASched("disable-dfa-sched", cl::Hidden, + cl::ZeroOrMore, cl::init(false), + cl::desc("Disable use of DFA during scheduling")); + +static cl::opt<signed> RegPressureThreshold( + "dfa-sched-reg-pressure-threshold", cl::Hidden, cl::ZeroOrMore, cl::init(5), + cl::desc("Track reg pressure and switch priority to in-depth")); + +ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) + : Picker(this), InstrItins(IS->MF->getSubtarget().getInstrItineraryData()) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + TRI = STI.getRegisterInfo(); + TLI = IS->TLI; + TII = STI.getInstrInfo(); + ResourcesModel.reset(TII->CreateTargetScheduleState(STI)); + // This hard requirement could be relaxed, but for now + // do not let it proceed. + assert(ResourcesModel && "Unimplemented CreateTargetScheduleState."); + + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); + I != E; ++I) + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, *IS->MF); + + ParallelLiveRanges = 0; + HorizontalVerticalBalance = 0; +} + +unsigned +ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *PredSU = I->getSUnit(); + const SDNode *ScegN = PredSU->getNode(); + + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: NumberDeps++; break; + case ISD::CopyToReg: break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + MVT VT = ScegN->getSimpleValueType(i); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, + unsigned RCId) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + + SUnit *SuccSU = I->getSUnit(); + const SDNode *ScegN = SuccSU->getNode(); + if (!ScegN) + continue; + + // If value is passed to CopyToReg, it is probably + // live outside BB. + switch (ScegN->getOpcode()) { + default: break; + case ISD::TokenFactor: break; + case ISD::CopyFromReg: break; + case ISD::CopyToReg: NumberDeps++; break; + case ISD::INLINEASM: break; + } + if (!ScegN->isMachineOpcode()) + continue; + + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (TLI->isTypeLegal(VT) + && (TLI->getRegClassFor(VT)->getID() == RCId)) { + NumberDeps++; + break; + } + } + } + return NumberDeps; +} + +static unsigned numberCtrlDepsInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +static unsigned numberCtrlPredInSU(SUnit *SU) { + unsigned NumberDeps = 0; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (I->isCtrl()) + NumberDeps++; + + return NumberDeps; +} + +/// +/// Initialize nodes. +/// +void ResourcePriorityQueue::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + NumNodesSolelyBlocking.resize(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + initNumRegDefsLeft(SU); + SU->NodeQueueId = 0; + } +} + +/// This heuristic is used if DFA scheduling is not desired +/// for some VLIW platform. +bool resource_sort::operator()(const SUnit *LHS, const SUnit *RHS) const { + // The isScheduleHigh flag allows nodes with wraparound dependencies that + // cannot easily be modeled as edges with latencies to be scheduled as + // soon as possible in a top-down schedule. + if (LHS->isScheduleHigh && !RHS->isScheduleHigh) + return false; + + if (!LHS->isScheduleHigh && RHS->isScheduleHigh) + return true; + + unsigned LHSNum = LHS->NodeNum; + unsigned RHSNum = RHS->NodeNum; + + // The most important heuristic is scheduling the critical path. + unsigned LHSLatency = PQ->getLatency(LHSNum); + unsigned RHSLatency = PQ->getLatency(RHSNum); + if (LHSLatency < RHSLatency) return true; + if (LHSLatency > RHSLatency) return false; + + // After that, if two nodes have identical latencies, look to see if one will + // unblock more other nodes than the other. + unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum); + unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum); + if (LHSBlocked < RHSBlocked) return true; + if (LHSBlocked > RHSBlocked) return false; + + // Finally, just to provide a stable ordering, use the node number as a + // deciding factor. + return LHSNum < RHSNum; +} + + +/// getSingleUnscheduledPred - If there is exactly one unscheduled predecessor +/// of SU, return it, otherwise return null. +SUnit *ResourcePriorityQueue::getSingleUnscheduledPred(SUnit *SU) { + SUnit *OnlyAvailablePred = nullptr; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + SUnit &Pred = *I->getSUnit(); + if (!Pred.isScheduled) { + // We found an available, but not scheduled, predecessor. If it's the + // only one we have found, keep track of it... otherwise give up. + if (OnlyAvailablePred && OnlyAvailablePred != &Pred) + return nullptr; + OnlyAvailablePred = &Pred; + } + } + return OnlyAvailablePred; +} + +void ResourcePriorityQueue::push(SUnit *SU) { + // Look at all of the successors of this node. Count the number of nodes that + // this node is the sole unscheduled node for. + unsigned NumNodesBlocking = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) + if (getSingleUnscheduledPred(I->getSUnit()) == SU) + ++NumNodesBlocking; + + NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking; + Queue.push_back(SU); +} + +/// Check if scheduling of this SU is possible +/// in the current packet. +bool ResourcePriorityQueue::isResourceAvailable(SUnit *SU) { + if (!SU || !SU->getNode()) + return false; + + // If this is a compound instruction, + // it is likely to be a call. Do not delay it. + if (SU->getNode()->getGluedNode()) + return true; + + // First see if the pipeline could receive this instruction + // in the current cycle. + if (SU->getNode()->isMachineOpcode()) + switch (SU->getNode()->getMachineOpcode()) { + default: + if (!ResourcesModel->canReserveResources(&TII->get( + SU->getNode()->getMachineOpcode()))) + return false; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + + // Now see if there are no other dependencies + // to instructions already in the packet. + for (unsigned i = 0, e = Packet.size(); i != e; ++i) + for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(), + E = Packet[i]->Succs.end(); I != E; ++I) { + // Since we do not add pseudos to packets, might as well + // ignore order deps. + if (I->isCtrl()) + continue; + + if (I->getSUnit() == SU) + return false; + } + + return true; +} + +/// Keep track of available resources. +void ResourcePriorityQueue::reserveResources(SUnit *SU) { + // If this SU does not fit in the packet + // start a new one. + if (!isResourceAvailable(SU) || SU->getNode()->getGluedNode()) { + ResourcesModel->clearResources(); + Packet.clear(); + } + + if (SU->getNode() && SU->getNode()->isMachineOpcode()) { + switch (SU->getNode()->getMachineOpcode()) { + default: + ResourcesModel->reserveResources(&TII->get( + SU->getNode()->getMachineOpcode())); + break; + case TargetOpcode::EXTRACT_SUBREG: + case TargetOpcode::INSERT_SUBREG: + case TargetOpcode::SUBREG_TO_REG: + case TargetOpcode::REG_SEQUENCE: + case TargetOpcode::IMPLICIT_DEF: + break; + } + Packet.push_back(SU); + } + // Forcefully end packet for PseudoOps. + else { + ResourcesModel->clearResources(); + Packet.clear(); + } + + // If packet is now full, reset the state so in the next cycle + // we start fresh. + if (Packet.size() >= InstrItins->SchedModel.IssueWidth) { + ResourcesModel->clearResources(); + Packet.clear(); + } +} + +signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + // Gen estimate. + for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { + MVT VT = SU->getNode()->getSimpleValueType(i); + if (TLI->isTypeLegal(VT) + && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance += numberRCValSuccInSU(SU, RCId); + } + // Kill estimate. + for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { + const SDValue &Op = SU->getNode()->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (isa<ConstantSDNode>(Op.getNode())) + continue; + + if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) + && TLI->getRegClassFor(VT)->getID() == RCId) + RegBalance -= numberRCValPredInSU(SU, RCId); + } + return RegBalance; +} + +/// Estimates change in reg pressure from this SU. +/// It is achieved by trivial tracking of defined +/// and used vregs in dependent instructions. +/// The RawPressure flag makes this function to ignore +/// existing reg file sizes, and report raw def/use +/// balance. +signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { + signed RegBalance = 0; + + if (!SU || !SU->getNode() || !SU->getNode()->isMachineOpcode()) + return RegBalance; + + if (RawPressure) { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + else { + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + if ((RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) > 0) && + (RegPressure[RC->getID()] + + rawRegPressureDelta(SU, RC->getID()) >= RegLimit[RC->getID()])) + RegBalance += rawRegPressureDelta(SU, RC->getID()); + } + } + + return RegBalance; +} + +// Constants used to denote relative importance of +// heuristic components for cost computation. +static const unsigned PriorityOne = 200; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; +static const unsigned ScaleOne = 20; +static const unsigned ScaleTwo = 10; +static const unsigned ScaleThree = 5; +static const unsigned FactorOne = 2; + +/// Returns single number reflecting benefit of scheduling SU +/// in the current cycle. +signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { + // Initial trivial priority. + signed ResCount = 1; + + // Do not waste time on a node that is already scheduled. + if (SU->isScheduled) + return ResCount; + + // Forced priority is high. + if (SU->isScheduleHigh) + ResCount += PriorityOne; + + // Adaptable scheduling + // A small, but very parallel + // region, where reg pressure is an issue. + if (HorizontalVerticalBalance > RegPressureThreshold) { + // Critical path first + ResCount += (SU->getHeight() * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + // Consider change to reg pressure from scheduling + // this SU. + ResCount -= (regPressureDelta(SU,true) * ScaleOne); + } + // Default heuristic, greeady and + // critical path driven. + else { + // Critical path first. + ResCount += (SU->getHeight() * ScaleTwo); + // Now see how many instructions is blocked by this SU. + ResCount += (NumNodesSolelyBlocking[SU->NodeNum] * ScaleTwo); + // If resources are available for it, multiply the + // chance of scheduling. + if (isResourceAvailable(SU)) + ResCount <<= FactorOne; + + ResCount -= (regPressureDelta(SU) * ScaleTwo); + } + + // These are platform-specific things. + // Will need to go into the back end + // and accessed from here via a hook. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + if (TID.isCall()) + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); + } + else + switch (N->getOpcode()) { + default: break; + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + ResCount += PriorityFour; + break; + + case ISD::INLINEASM: + ResCount += PriorityThree; + break; + } + } + return ResCount; +} + + +/// Main resource tracking point. +void ResourcePriorityQueue::scheduledNode(SUnit *SU) { + // Use NULL entry as an event marker to reset + // the DFA state. + if (!SU) { + ResourcesModel->clearResources(); + Packet.clear(); + return; + } + + const SDNode *ScegN = SU->getNode(); + // Update reg pressure tracking. + // First update current node. + if (ScegN->isMachineOpcode()) { + // Estimate generated regs. + for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { + MVT VT = ScegN->getSimpleValueType(i); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) + RegPressure[RC->getID()] += numberRCValSuccInSU(SU, RC->getID()); + } + } + // Estimate killed regs. + for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { + const SDValue &Op = ScegN->getOperand(i); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + + if (TLI->isTypeLegal(VT)) { + const TargetRegisterClass *RC = TLI->getRegClassFor(VT); + if (RC) { + if (RegPressure[RC->getID()] > + (numberRCValPredInSU(SU, RC->getID()))) + RegPressure[RC->getID()] -= numberRCValPredInSU(SU, RC->getID()); + else RegPressure[RC->getID()] = 0; + } + } + } + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl() || (I->getSUnit()->NumRegDefsLeft == 0)) + continue; + --I->getSUnit()->NumRegDefsLeft; + } + } + + // Reserve resources for this SU. + reserveResources(SU); + + // Adjust number of parallel live ranges. + // Heuristic is simple - node with no data successors reduces + // number of live ranges. All others, increase it. + unsigned NumberNonControlDeps = 0; + + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + adjustPriorityOfUnscheduledPreds(I->getSUnit()); + if (!I->isCtrl()) + NumberNonControlDeps++; + } + + if (!NumberNonControlDeps) { + if (ParallelLiveRanges >= SU->NumPreds) + ParallelLiveRanges -= SU->NumPreds; + else + ParallelLiveRanges = 0; + + } + else + ParallelLiveRanges += SU->NumRegDefsLeft; + + // Track parallel live chains. + HorizontalVerticalBalance += (SU->Succs.size() - numberCtrlDepsInSU(SU)); + HorizontalVerticalBalance -= (SU->Preds.size() - numberCtrlPredInSU(SU)); +} + +void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) { + unsigned NodeNumDefs = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) { + const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); + // No register need be allocated for this. + if (N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) { + NodeNumDefs = 0; + break; + } + NodeNumDefs = std::min(N->getNumValues(), TID.getNumDefs()); + } + else + switch(N->getOpcode()) { + default: break; + case ISD::CopyFromReg: + NodeNumDefs++; + break; + case ISD::INLINEASM: + NodeNumDefs++; + break; + } + + SU->NumRegDefsLeft = NodeNumDefs; +} + +/// adjustPriorityOfUnscheduledPreds - One of the predecessors of SU was just +/// scheduled. If SU is not itself available, then there is at least one +/// predecessor node that has not been scheduled yet. If SU has exactly ONE +/// unscheduled predecessor, we want to increase its priority: it getting +/// scheduled will make this node available, so it is better than some other +/// node of the same priority that will not make a node available. +void ResourcePriorityQueue::adjustPriorityOfUnscheduledPreds(SUnit *SU) { + if (SU->isAvailable) return; // All preds scheduled. + + SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU); + if (!OnlyAvailablePred || !OnlyAvailablePred->isAvailable) + return; + + // Okay, we found a single predecessor that is available, but not scheduled. + // Since it is available, it must be in the priority queue. First remove it. + remove(OnlyAvailablePred); + + // Reinsert the node into the priority queue, which recomputes its + // NumNodesSolelyBlocking value. + push(OnlyAvailablePred); +} + + +/// Main access point - returns next instructions +/// to be placed in scheduling sequence. +SUnit *ResourcePriorityQueue::pop() { + if (empty()) + return nullptr; + + std::vector<SUnit *>::iterator Best = Queue.begin(); + if (!DisableDFASched) { + signed BestCost = SUSchedulingCost(*Best); + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), + E = Queue.end(); I != E; ++I) { + + if (SUSchedulingCost(*I) > BestCost) { + BestCost = SUSchedulingCost(*I); + Best = I; + } + } + } + // Use default TD scheduling mechanism. + else { + for (std::vector<SUnit *>::iterator I = std::next(Queue.begin()), + E = Queue.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + } + + SUnit *V = *Best; + if (Best != std::prev(Queue.end())) + std::swap(*Best, Queue.back()); + + Queue.pop_back(); + + return V; +} + + +void ResourcePriorityQueue::remove(SUnit *SU) { + assert(!Queue.empty() && "Queue is empty!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU); + if (I != std::prev(Queue.end())) + std::swap(*I, Queue.back()); + + Queue.pop_back(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h new file mode 100644 index 0000000..c27f8de --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -0,0 +1,124 @@ +//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares the SDDbgValue class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SDNODEDBGVALUE_H + +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/DataTypes.h" + +namespace llvm { + +class MDNode; +class SDNode; +class Value; + +/// SDDbgValue - Holds the information from a dbg_value node through SDISel. +/// We do not use SDValue here to avoid including its header. + +class SDDbgValue { +public: + enum DbgValueKind { + SDNODE = 0, // value is the result of an expression + CONST = 1, // value is a constant + FRAMEIX = 2 // value is contents of a stack location + }; +private: + union { + struct { + SDNode *Node; // valid for expressions + unsigned ResNo; // valid for expressions + } s; + const Value *Const; // valid for constants + unsigned FrameIx; // valid for stack objects + } u; + MDNode *Var; + MDNode *Expr; + uint64_t Offset; + DebugLoc DL; + unsigned Order; + enum DbgValueKind kind; + bool IsIndirect; + bool Invalid = false; + +public: + // Constructor for non-constants. + SDDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, unsigned R, bool indir, + uint64_t off, DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(indir) { + kind = SDNODE; + u.s.Node = N; + u.s.ResNo = R; + } + + // Constructor for constants. + SDDbgValue(MDNode *Var, MDNode *Expr, const Value *C, uint64_t off, + DebugLoc dl, unsigned O) + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { + kind = CONST; + u.Const = C; + } + + // Constructor for frame indices. + SDDbgValue(MDNode *Var, MDNode *Expr, unsigned FI, uint64_t off, DebugLoc dl, + unsigned O) + : Var(Var), Expr(Expr), Offset(off), DL(dl), Order(O), IsIndirect(false) { + kind = FRAMEIX; + u.FrameIx = FI; + } + + // Returns the kind. + DbgValueKind getKind() const { return kind; } + + // Returns the MDNode pointer for the variable. + MDNode *getVariable() const { return Var; } + + // Returns the MDNode pointer for the expression. + MDNode *getExpression() const { return Expr; } + + // Returns the SDNode* for a register ref + SDNode *getSDNode() const { assert (kind==SDNODE); return u.s.Node; } + + // Returns the ResNo for a register ref + unsigned getResNo() const { assert (kind==SDNODE); return u.s.ResNo; } + + // Returns the Value* for a constant + const Value *getConst() const { assert (kind==CONST); return u.Const; } + + // Returns the FrameIx for a stack object + unsigned getFrameIx() const { assert (kind==FRAMEIX); return u.FrameIx; } + + // Returns whether this is an indirect value. + bool isIndirect() const { return IsIndirect; } + + // Returns the offset. + uint64_t getOffset() const { return Offset; } + + // Returns the DebugLoc. + DebugLoc getDebugLoc() const { return DL; } + + // Returns the SDNodeOrder. This is the order of the preceding node in the + // input. + unsigned getOrder() const { return Order; } + + // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated" + // property. A SDDbgValue is invalid if the SDNode that produces the value is + // deleted. + void setIsInvalidated() { Invalid = true; } + bool isInvalidated() const { return Invalid; } +}; + +} // end llvm namespace + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp new file mode 100644 index 0000000..62e7733 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -0,0 +1,805 @@ +//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a fast scheduler. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "InstrEmitter.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical copies"); + +static RegisterScheduler + fastDAGScheduler("fast", "Fast suboptimal list scheduling", + createFastDAGScheduler); +static RegisterScheduler + linearizeDAGScheduler("linearize", "Linearize DAG, no scheduling", + createDAGLinearizer); + + +namespace { + /// FastPriorityQueue - A degenerate priority queue that considers + /// all nodes to have the same priority. + /// + struct FastPriorityQueue { + SmallVector<SUnit *, 16> Queue; + + bool empty() const { return Queue.empty(); } + + void push(SUnit *U) { + Queue.push_back(U); + } + + SUnit *pop() { + if (empty()) return nullptr; + SUnit *V = Queue.back(); + Queue.pop_back(); + return V; + } + }; + +//===----------------------------------------------------------------------===// +/// ScheduleDAGFast - The actual "fast" list scheduler implementation. +/// +class ScheduleDAGFast : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + FastPriorityQueue AvailableQueue; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::vector<SUnit*> LiveRegDefs; + std::vector<unsigned> LiveRegCycles; + +public: + ScheduleDAGFast(MachineFunction &mf) + : ScheduleDAGSDNodes(mf) {} + + void Schedule() override; + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + void AddPred(SUnit *SU, const SDep &D) { + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + void RemovePred(SUnit *SU, const SDep &D) { + SU->removePred(D); + } + +private: + void ReleasePred(SUnit *SU, SDep *PredEdge); + void ReleasePredecessors(SUnit *SU, unsigned CurCycle); + void ScheduleNodeBottomUp(SUnit*, unsigned); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); + void ListScheduleBottomUp(); + + /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. + bool forceUnitLatencies() const override { return true; } +}; +} // end anonymous namespace + + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGFast::Schedule() { + DEBUG(dbgs() << "********** List Scheduling **********\n"); + + NumLiveRegs = 0; + LiveRegDefs.resize(TRI->getNumRegs(), nullptr); + LiveRegCycles.resize(TRI->getNumRegs(), 0); + + // Build the scheduling graph. + BuildSchedGraph(nullptr); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGFast::ReleasePred(SUnit *SU, SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + --PredSU->NumSuccsLeft; + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + AvailableQueue.push(PredSU); + } +} + +void ScheduleDAGFast::ReleasePredecessors(SUnit *SU, unsigned CurCycle) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + if (!LiveRegDefs[I->getReg()]) { + ++NumLiveRegs; + LiveRegDefs[I->getReg()] = I->getSUnit(); + LiveRegCycles[I->getReg()] = CurCycle; + } + } + } +} + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + assert(CurCycle >= SU->getHeight() && "Node scheduled below its height!"); + SU->setHeightToAtLeast(CurCycle); + Sequence.push_back(SU); + + ReleasePredecessors(SU, CurCycle); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == SU && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegCycles[I->getReg()] = 0; + } + } + } + + SU->isScheduled = true; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { + if (SU->getNode()->getGluedNode()) + return nullptr; + + SDNode *N = SU->getNode(); + if (!N) + return nullptr; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue) + return nullptr; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (const SDValue &Op : N->op_values()) { + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Glue) + return nullptr; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + DEBUG(dbgs() << "Unfolding SU # " << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + SUnit *NewSU = newSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = newSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + } + + SDep ChainPred; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPred = *I; + else if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->isOperandOf(LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + if (ChainPred.getSUnit()) { + RemovePred(SU, ChainPred); + if (isNewLoad) + AddPred(LoadSU, ChainPred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) { + AddPred(LoadSU, Pred); + } + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + if (isNewLoad) { + SDep D(LoadSU, SDep::Barrier); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + } + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DEBUG(dbgs() << "Duplicating SU # " << SU->NodeNum << "\n"); + NewSU = Clone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { + SUnit *CopyFromSU = newSUnit(static_cast<SDNode *>(nullptr)); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = newSUnit(static_cast<SDNode *>(nullptr)); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + RemovePred(DelDeps[i].first, DelDeps[i].second); + } + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); + + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + } + return N->getSimpleValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, + std::vector<SUnit*> &LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVectorImpl<unsigned> &LRegs, + const TargetRegisterInfo *TRI) { + bool Added = false; + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { + if (LiveRegDefs[*AI] && LiveRegDefs[*AI] != SU) { + if (RegAdded.insert(*AI).second) { + LRegs.push_back(*AI); + Added = true; + } + } + } + return Added; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, + SmallVectorImpl<unsigned> &LRegs){ + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep()) { + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, + RegAdded, LRegs, TRI); + } + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + if (!Node->isMachineOpcode()) + continue; + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) + continue; + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); + } + } + return !LRegs.empty(); +} + + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGFast::ListScheduleBottomUp() { + unsigned CurCycle = 0; + + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU, CurCycle); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue.push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + SmallVector<SUnit*, 4> NotReady; + DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue.empty()) { + bool Delayed = false; + LRegsMap.clear(); + SUnit *CurSU = AvailableQueue.pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + Delayed = true; + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + NotReady.push_back(CurSU); + CurSU = AvailableQueue.pop(); + } + + // All candidates are delayed due to live physical reg dependencies. + // Try code duplication or inserting cross class copies + // to resolve it. + if (Delayed && !CurSU) { + if (!CurSU) { + // Try duplicating the nodes that produces these + // "expensive to copy" values to break the dependency. In case even + // that doesn't work, insert cross class copies. + SUnit *TrySU = NotReady[0]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is the same as RC, then it must be + // possible copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's + // possible to copy the value but it require cross register class copies + // and it is expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. + SUnit *NewDef = nullptr; + if (DestRC != RC) { + NewDef = CopyAndMoveSuccessors(LRDef); + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical " + "register dependency!"); + } + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << "Adding an edge from SU # " << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << "Adding an edge from SU # " << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + + if (!CurSU) { + llvm_unreachable("Unable to resolve live physical register dependencies!"); + } + } + + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = 0, e = NotReady.size(); i != e; ++i) { + NotReady[i]->isPending = false; + // May no longer be available due to backtracking. + if (NotReady[i]->isAvailable) + AvailableQueue.push(NotReady[i]); + } + NotReady.clear(); + + if (CurSU) + ScheduleNodeBottomUp(CurSU, CurCycle); + ++CurCycle; + } + + // Reverse the order since it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + + +namespace { +//===----------------------------------------------------------------------===// +// ScheduleDAGLinearize - No scheduling scheduler, it simply linearize the +// DAG in topological order. +// IMPORTANT: this may not work for targets with phyreg dependency. +// +class ScheduleDAGLinearize : public ScheduleDAGSDNodes { +public: + ScheduleDAGLinearize(MachineFunction &mf) : ScheduleDAGSDNodes(mf) {} + + void Schedule() override; + + MachineBasicBlock * + EmitSchedule(MachineBasicBlock::iterator &InsertPos) override; + +private: + std::vector<SDNode*> Sequence; + DenseMap<SDNode*, SDNode*> GluedMap; // Cache glue to its user + + void ScheduleNode(SDNode *N); +}; +} // end anonymous namespace + +void ScheduleDAGLinearize::ScheduleNode(SDNode *N) { + if (N->getNodeId() != 0) + llvm_unreachable(nullptr); + + if (!N->isMachineOpcode() && + (N->getOpcode() == ISD::EntryToken || isPassiveNode(N))) + // These nodes do not need to be translated into MIs. + return; + + DEBUG(dbgs() << "\n*** Scheduling: "); + DEBUG(N->dump(DAG)); + Sequence.push_back(N); + + unsigned NumOps = N->getNumOperands(); + if (unsigned NumLeft = NumOps) { + SDNode *GluedOpN = nullptr; + do { + const SDValue &Op = N->getOperand(NumLeft-1); + SDNode *OpN = Op.getNode(); + + if (NumLeft == NumOps && Op.getValueType() == MVT::Glue) { + // Schedule glue operand right above N. + GluedOpN = OpN; + assert(OpN->getNodeId() != 0 && "Glue operand not ready?"); + OpN->setNodeId(0); + ScheduleNode(OpN); + continue; + } + + if (OpN == GluedOpN) + // Glue operand is already scheduled. + continue; + + DenseMap<SDNode*, SDNode*>::iterator DI = GluedMap.find(OpN); + if (DI != GluedMap.end() && DI->second != N) + // Users of glues are counted against the glued users. + OpN = DI->second; + + unsigned Degree = OpN->getNodeId(); + assert(Degree > 0 && "Predecessor over-released!"); + OpN->setNodeId(--Degree); + if (Degree == 0) + ScheduleNode(OpN); + } while (--NumLeft); + } +} + +/// findGluedUser - Find the representative use of a glue value by walking +/// the use chain. +static SDNode *findGluedUser(SDNode *N) { + while (SDNode *Glued = N->getGluedUser()) + N = Glued; + return N; +} + +void ScheduleDAGLinearize::Schedule() { + DEBUG(dbgs() << "********** DAG Linearization **********\n"); + + SmallVector<SDNode*, 8> Glues; + unsigned DAGSize = 0; + for (SDNode &Node : DAG->allnodes()) { + SDNode *N = &Node; + + // Use node id to record degree. + unsigned Degree = N->use_size(); + N->setNodeId(Degree); + unsigned NumVals = N->getNumValues(); + if (NumVals && N->getValueType(NumVals-1) == MVT::Glue && + N->hasAnyUseOfValue(NumVals-1)) { + SDNode *User = findGluedUser(N); + if (User) { + Glues.push_back(N); + GluedMap.insert(std::make_pair(N, User)); + } + } + + if (N->isMachineOpcode() || + (N->getOpcode() != ISD::EntryToken && !isPassiveNode(N))) + ++DAGSize; + } + + for (unsigned i = 0, e = Glues.size(); i != e; ++i) { + SDNode *Glue = Glues[i]; + SDNode *GUser = GluedMap[Glue]; + unsigned Degree = Glue->getNodeId(); + unsigned UDegree = GUser->getNodeId(); + + // Glue user must be scheduled together with the glue operand. So other + // users of the glue operand must be treated as its users. + SDNode *ImmGUser = Glue->getGluedUser(); + for (SDNode::use_iterator ui = Glue->use_begin(), ue = Glue->use_end(); + ui != ue; ++ui) + if (*ui == ImmGUser) + --Degree; + GUser->setNodeId(UDegree + Degree); + Glue->setNodeId(1); + } + + Sequence.reserve(DAGSize); + ScheduleNode(DAG->getRoot().getNode()); +} + +MachineBasicBlock* +ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + + DEBUG({ + dbgs() << "\n*** Final schedule ***\n"; + }); + + // FIXME: Handle dbg_values. + unsigned NumNodes = Sequence.size(); + for (unsigned i = 0; i != NumNodes; ++i) { + SDNode *N = Sequence[NumNodes-i-1]; + DEBUG(N->dump(DAG)); + Emitter.EmitNode(N, false, false, VRBaseMap); + } + + DEBUG(dbgs() << '\n'); + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createFastDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGFast(*IS->MF); +} + +llvm::ScheduleDAGSDNodes * +llvm::createDAGLinearizer(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGLinearize(*IS->MF); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp new file mode 100644 index 0000000..91024e6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -0,0 +1,3050 @@ +//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements bottom-up and top-down register pressure reduction list +// schedulers, using standard algorithms. The basic approach uses a priority +// queue of available nodes to schedule. One at a time, nodes are taken from +// the priority queue (thus in priority order), checked for legality to +// schedule, and emitted if legal. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <climits> +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); +STATISTIC(NumUnfolds, "Number of nodes unfolded"); +STATISTIC(NumDups, "Number of duplicated nodes"); +STATISTIC(NumPRCopies, "Number of physical register copies"); + +static RegisterScheduler + burrListDAGScheduler("list-burr", + "Bottom-up register reduction list scheduling", + createBURRListDAGScheduler); +static RegisterScheduler + sourceListDAGScheduler("source", + "Similar to list-burr but schedules in source " + "order when possible", + createSourceListDAGScheduler); + +static RegisterScheduler + hybridListDAGScheduler("list-hybrid", + "Bottom-up register pressure aware list scheduling " + "which tries to balance latency and register pressure", + createHybridListDAGScheduler); + +static RegisterScheduler + ILPListDAGScheduler("list-ilp", + "Bottom-up register pressure aware list scheduling " + "which tries to balance ILP and register pressure", + createILPListDAGScheduler); + +static cl::opt<bool> DisableSchedCycles( + "disable-sched-cycles", cl::Hidden, cl::init(false), + cl::desc("Disable cycle-level precision during preRA scheduling")); + +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt<bool> DisableSchedRegPressure( + "disable-sched-reg-pressure", cl::Hidden, cl::init(false), + cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedLiveUses( + "disable-sched-live-uses", cl::Hidden, cl::init(true), + cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); +static cl::opt<bool> DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); +static cl::opt<bool> DisableSchedStalls( + "disable-sched-stalls", cl::Hidden, cl::init(true), + cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedCriticalPath( + "disable-sched-critical-path", cl::Hidden, cl::init(false), + cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt<bool> DisableSchedHeight( + "disable-sched-height", cl::Hidden, cl::init(false), + cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt<bool> Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); + +static cl::opt<int> MaxReorderWindow( + "max-sched-reorder", cl::Hidden, cl::init(6), + cl::desc("Number of instructions to allow ahead of the critical path " + "in sched=list-ilp")); + +static cl::opt<unsigned> AvgIPC( + "sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle whan no target itinerary exists.")); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGRRList - The actual register reduction list scheduler +/// implementation. This supports both top-down and bottom-up scheduling. +/// +class ScheduleDAGRRList : public ScheduleDAGSDNodes { +private: + /// NeedLatency - True if the scheduler will make use of latency information. + /// + bool NeedLatency; + + /// AvailableQueue - The priority queue to use for the available SUnits. + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands becomes available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// CurCycle - The current scheduler state corresponds to this cycle. + unsigned CurCycle; + + /// MinAvailableCycle - Cycle of the soonest available instruction. + unsigned MinAvailableCycle; + + /// IssueCount - Count instructions issued in this cycle + /// Currently valid only for bottom-up scheduling. + unsigned IssueCount; + + /// LiveRegDefs - A set of physical registers and their definition + /// that are "live". These nodes must be scheduled before any other nodes that + /// modifies the registers can be scheduled. + unsigned NumLiveRegs; + std::unique_ptr<SUnit*[]> LiveRegDefs; + std::unique_ptr<SUnit*[]> LiveRegGens; + + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector<SUnit*, 4> Interferences; + typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT; + LRegsMapT LRegsMap; + + /// Topo - A topological ordering for SUnits which permits fast IsReachable + /// and similar queries. + ScheduleDAGTopologicalSort Topo; + + // Hack to keep track of the inverse of FindCallSeqStart without more crazy + // DAG crawling. + DenseMap<SUnit*, SUnit*> CallSeqEndForStart; + +public: + ScheduleDAGRRList(MachineFunction &mf, bool needlatency, + SchedulingPriorityQueue *availqueue, + CodeGenOpt::Level OptLevel) + : ScheduleDAGSDNodes(mf), + NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), + Topo(SUnits, nullptr) { + + const TargetSubtargetInfo &STI = mf.getSubtarget(); + if (DisableSchedCycles || !NeedLatency) + HazardRec = new ScheduleHazardRecognizer(); + else + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); + } + + ~ScheduleDAGRRList() override { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule() override; + + ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } + + /// IsReachable - Checks if SU is reachable from TargetSU. + bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { + return Topo.IsReachable(SU, TargetSU); + } + + /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will + /// create a cycle. + bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { + return Topo.WillCreateCycle(SU, TargetSU); + } + + /// AddPred - adds a predecessor edge to SUnit SU. + /// This returns true if this is a new predecessor. + /// Updates the topological ordering if required. + void AddPred(SUnit *SU, const SDep &D) { + Topo.AddPred(SU, D.getSUnit()); + SU->addPred(D); + } + + /// RemovePred - removes a predecessor edge from SUnit SU. + /// This returns true if an edge was removed. + /// Updates the topological ordering if required. + void RemovePred(SUnit *SU, const SDep &D) { + Topo.RemovePred(SU, D.getSUnit()); + SU->removePred(D); + } + +private: + bool isReady(SUnit *SU) { + return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || + AvailableQueue->isReady(SU); + } + + void ReleasePred(SUnit *SU, const SDep *PredEdge); + void ReleasePredecessors(SUnit *SU); + void ReleasePending(); + void AdvanceToCycle(unsigned NextCycle); + void AdvancePastStalls(SUnit *SU); + void EmitNode(SUnit *SU); + void ScheduleNodeBottomUp(SUnit*); + void CapturePred(SDep *PredEdge); + void UnscheduleNodeBottomUp(SUnit*); + void RestoreHazardCheckerBottomUp(); + void BacktrackBottomUp(SUnit*, SUnit*); + SUnit *CopyAndMoveSuccessors(SUnit*); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVectorImpl<SUnit*>&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&); + + void releaseInterferences(unsigned Reg = 0); + + SUnit *PickNodeToScheduleBottomUp(); + void ListScheduleBottomUp(); + + /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. + /// Updates the topological ordering if required. + SUnit *CreateNewSUnit(SDNode *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = newSUnit(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// CreateClone - Creates a new SUnit from an existing one. + /// Updates the topological ordering if required. + SUnit *CreateClone(SUnit *N) { + unsigned NumSUnits = SUnits.size(); + SUnit *NewNode = Clone(N); + // Update the topological ordering. + if (NewNode->NodeNum >= NumSUnits) + Topo.InitDAGTopologicalSorting(); + return NewNode; + } + + /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't + /// need actual latency information but the hybrid scheduler does. + bool forceUnitLatencies() const override { + return !NeedLatency; + } +}; +} // end anonymous namespace + +/// GetCostForDef - Looks up the register class and cost for a given definition. +/// Typically this just means looking up the representative register class, +/// but for untyped values (MVT::Untyped) it means inspecting the node's +/// opcode to determine what register class is being generated. +static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, + const TargetLowering *TLI, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI, + unsigned &RegClass, unsigned &Cost, + const MachineFunction &MF) { + MVT VT = RegDefPos.GetValue(); + + // Special handling for untyped values. These values can only come from + // the expansion of custom DAG-to-DAG patterns. + if (VT == MVT::Untyped) { + const SDNode *Node = RegDefPos.GetNode(); + + // Special handling for CopyFromReg of untyped values. + if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Opcode = Node->getMachineOpcode(); + if (Opcode == TargetOpcode::REG_SEQUENCE) { + unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); + const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Idx = RegDefPos.GetIdx(); + const MCInstrDesc Desc = TII->get(Opcode); + const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF); + RegClass = RC->getID(); + // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a + // better way to determine it. + Cost = 1; + } else { + RegClass = TLI->getRepRegClassFor(VT)->getID(); + Cost = TLI->getRepRegClassCostFor(VT); + } +} + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGRRList::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); + + CurCycle = 0; + IssueCount = 0; + MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; + NumLiveRegs = 0; + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.reset(new SUnit*[TRI->getNumRegs() + 1]()); + LiveRegGens.reset(new SUnit*[TRI->getNumRegs() + 1]()); + CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); + + // Build the scheduling graph. + BuildSchedGraph(nullptr); + + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) + SUnits[su].dumpAll(this)); + Topo.InitDAGTopologicalSorting(); + + AvailableQueue->initNodes(SUnits); + + HazardRec->Reset(); + + // Execute the actual scheduling loop. + ListScheduleBottomUp(); + + AvailableQueue->releaseState(); + + DEBUG({ + dbgs() << "*** Final schedule ***\n"; + dumpSchedule(); + dbgs() << '\n'; + }); +} + +//===----------------------------------------------------------------------===// +// Bottom-Up Scheduling +//===----------------------------------------------------------------------===// + +/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to +/// the AvailableQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + +#ifndef NDEBUG + if (PredSU->NumSuccsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + PredSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + --PredSU->NumSuccsLeft; + + if (!forceUnitLatencies()) { + // Updating predecessor's height. This is now the cycle when the + // predecessor can be scheduled without causing a pipeline stall. + PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); + } + + // If all the node's successors are scheduled, this node is ready + // to be scheduled. Ignore the special EntrySU node. + if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { + PredSU->isAvailable = true; + + unsigned Height = PredSU->getHeight(); + if (Height < MinAvailableCycle) + MinAvailableCycle = Height; + + if (isReady(PredSU)) { + AvailableQueue->push(PredSU); + } + // CapturePred and others may have left the node in the pending queue, avoid + // adding it twice. + else if (!PredSU->isPending) { + PredSU->isPending = true; + PendingQueue.push_back(PredSU); + } + } +} + +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (const SDValue &Op : N->op_values()) + if (IsChainDependent(Op.getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (const SDValue &Op : N->op_values()) + if (Op.getValueType() == MVT::Other) { + N = Op.getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = nullptr; + unsigned BestMaxNest = MaxNest; + for (const SDValue &Op : N->op_values()) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(Op.getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (const SDValue &Op : N->op_values()) + if (Op.getValueType() == MVT::Other) { + N = Op.getNode(); + goto found_chain_operand; + } + return nullptr; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return nullptr; + } +} + +/// Call ReleasePred for each predecessor, then update register live def/gen. +/// Always update LiveRegDefs for a register dependence even if the current SU +/// also defines the register. This effectively create one large live range +/// across a sequence of two-address node. This is important because the +/// entire chain must be scheduled together. Example: +/// +/// flags = (3) add +/// flags = (2) addc flags +/// flags = (1) addc flags +/// +/// results in +/// +/// LiveRegDefs[flags] = 3 +/// LiveRegGens[flags] = 1 +/// +/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid +/// interference on flags. +void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { + // Bottom up: release predecessors + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + ReleasePred(SU, &*I); + if (I->isAssignedRegDep()) { + // This is a physical register dependency and it's impossible or + // expensive to copy the register. Make sure nothing that can + // clobber the register is scheduled between the predecessor and + // this node. + SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; + assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && + "interference on register dependence"); + LiveRegDefs[I->getReg()] = I->getSUnit(); + if (!LiveRegGens[I->getReg()]) { + ++NumLiveRegs; + LiveRegGens[I->getReg()] = SU; + } + } + } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + CallSeqEndForStart[Def] = SU; + + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } +} + +/// Check to see if any of the pending instructions are ready to issue. If +/// so, add them to the available queue. +void ScheduleDAGRRList::ReleasePending() { + if (DisableSchedCycles) { + assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); + return; + } + + // If the available queue is empty, it is safe to reset MinAvailableCycle. + if (AvailableQueue->empty()) + MinAvailableCycle = UINT_MAX; + + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + unsigned ReadyCycle = PendingQueue[i]->getHeight(); + if (ReadyCycle < MinAvailableCycle) + MinAvailableCycle = ReadyCycle; + + if (PendingQueue[i]->isAvailable) { + if (!isReady(PendingQueue[i])) + continue; + AvailableQueue->push(PendingQueue[i]); + } + PendingQueue[i]->isPending = false; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } +} + +/// Move the scheduler state forward by the specified number of Cycles. +void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { + if (NextCycle <= CurCycle) + return; + + IssueCount = 0; + AvailableQueue->setCurCycle(NextCycle); + if (!HazardRec->isEnabled()) { + // Bypass lots of virtual calls in case of long latency. + CurCycle = NextCycle; + } + else { + for (; CurCycle != NextCycle; ++CurCycle) { + HazardRec->RecedeCycle(); + } + } + // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the + // available Q to release pending nodes at least once before popping. + ReleasePending(); +} + +/// Move the scheduler state forward until the specified node's dependents are +/// ready and can be scheduled with no resource conflicts. +void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { + if (DisableSchedCycles) + return; + + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + + unsigned ReadyCycle = SU->getHeight(); + + // Bump CurCycle to account for latency. We assume the latency of other + // available instructions may be hidden by the stall (not a full pipe stall). + // This updates the hazard recognizer's cycle before reserving resources for + // this instruction. + AdvanceToCycle(ReadyCycle); + + // Calls are scheduled in their preceding cycle, so don't conflict with + // hazards from instructions after the call. EmitNode will reset the + // scoreboard state before emitting the call. + if (SU->isCall) + return; + + // FIXME: For resource conflicts in very long non-pipelined stages, we + // should probably skip ahead here to avoid useless scoreboard checks. + int Stalls = 0; + while (true) { + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(SU, -Stalls); + + if (HT == ScheduleHazardRecognizer::NoHazard) + break; + + ++Stalls; + } + AdvanceToCycle(CurCycle + Stalls); +} + +/// Record this SUnit in the HazardRecognizer. +/// Does not update CurCycle. +void ScheduleDAGRRList::EmitNode(SUnit *SU) { + if (!HazardRec->isEnabled()) + return; + + // Check for phys reg copy. + if (!SU->getNode()) + return; + + switch (SU->getNode()->getOpcode()) { + default: + assert(SU->getNode()->isMachineOpcode() && + "This target-independent node should not be scheduled."); + break; + case ISD::MERGE_VALUES: + case ISD::TokenFactor: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + case ISD::CopyToReg: + case ISD::CopyFromReg: + case ISD::EH_LABEL: + // Noops don't affect the scoreboard state. Copies are likely to be + // removed. + return; + case ISD::INLINEASM: + // For inline asm, clear the pipeline state. + HazardRec->Reset(); + return; + } + if (SU->isCall) { + // Calls are scheduled with their preceding instructions. For bottom-up + // scheduling, clear the pipeline state before emitting. + HazardRec->Reset(); + } + + HazardRec->EmitInstruction(SU); +} + +static void resetVRegCycle(SUnit *SU); + +/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending +/// count of its predecessors. If a predecessor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { + DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + +#ifndef NDEBUG + if (CurCycle < SU->getHeight()) + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); +#endif + + // FIXME: Do not modify node height. It may interfere with + // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the + // node its ready cycle can aid heuristics, and after scheduling it can + // indicate the scheduled cycle. + SU->setHeightToAtLeast(CurCycle); + + // Reserve resources for the scheduled instruction. + EmitNode(SU); + + Sequence.push_back(SU); + + AvailableQueue->scheduledNode(SU); + + // If HazardRec is disabled, and each inst counts as one cycle, then + // advance CurCycle before ReleasePredecessors to avoid useless pushes to + // PendingQueue for schedulers that implement HasReadyFilter. + if (!HazardRec->isEnabled() && AvgIPC < 2) + AdvanceToCycle(CurCycle + 1); + + // Update liveness of predecessors before successors to avoid treating a + // two-address node as a live range def. + ReleasePredecessors(SU); + + // Release all the implicit physical register defs that are live. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; + releaseInterferences(I->getReg()); + } + } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; + releaseInterferences(CallResource); + } + } + + resetVRegCycle(SU); + + SU->isScheduled = true; + + // Conditions under which the scheduler should eagerly advance the cycle: + // (1) No available instructions + // (2) All pipelines full, so available instructions must have hazards. + // + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. + // + // Check AvailableQueue after ReleasePredecessors in case of zero latency. + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } +} + +/// CapturePred - This does the opposite of ReleasePred. Since SU is being +/// unscheduled, incrcease the succ left count of its predecessors. Remove +/// them from AvailableQueue if necessary. +void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { + SUnit *PredSU = PredEdge->getSUnit(); + if (PredSU->isAvailable) { + PredSU->isAvailable = false; + if (!PredSU->isPending) + AvailableQueue->remove(PredSU); + } + + assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++PredSU->NumSuccsLeft; +} + +/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and +/// its predecessor states to reflect the change. +void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { + DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); + DEBUG(SU->dump(this)); + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + CapturePred(&*I); + if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + assert(LiveRegDefs[I->getReg()] == I->getSUnit() && + "Physical register dependency violated?"); + --NumLiveRegs; + LiveRegDefs[I->getReg()] = nullptr; + LiveRegGens[I->getReg()] = nullptr; + releaseInterferences(I->getReg()); + } + } + + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = CallSeqEndForStart[SU]; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = nullptr; + LiveRegGens[CallResource] = nullptr; + releaseInterferences(CallResource); + } + } + + for (auto &Succ : SU->Succs) { + if (Succ.isAssignedRegDep()) { + auto Reg = Succ.getReg(); + if (!LiveRegDefs[Reg]) + ++NumLiveRegs; + // This becomes the nearest def. Note that an earlier def may still be + // pending if this is a two-address node. + LiveRegDefs[Reg] = SU; + + // Update LiveRegGen only if was empty before this unscheduling. + // This is to avoid incorrect updating LiveRegGen set in previous run. + if (!LiveRegGens[Reg]) { + // Find the successor with the lowest height. + LiveRegGens[Reg] = Succ.getSUnit(); + for (auto &Succ2 : SU->Succs) { + if (Succ2.isAssignedRegDep() && Succ2.getReg() == Reg && + Succ2.getSUnit()->getHeight() < LiveRegGens[Reg]->getHeight()) + LiveRegGens[Reg] = Succ2.getSUnit(); + } + } + } + } + if (SU->getHeight() < MinAvailableCycle) + MinAvailableCycle = SU->getHeight(); + + SU->setHeightDirty(); + SU->isScheduled = false; + SU->isAvailable = true; + if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { + // Don't make available until backtracking is complete. + SU->isPending = true; + PendingQueue.push_back(SU); + } + else { + AvailableQueue->push(SU); + } + AvailableQueue->unscheduledNode(SU); +} + +/// After backtracking, the hazard checker needs to be restored to a state +/// corresponding the current cycle. +void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { + HazardRec->Reset(); + + unsigned LookAhead = std::min((unsigned)Sequence.size(), + HazardRec->getMaxLookAhead()); + if (LookAhead == 0) + return; + + std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); + unsigned HazardCycle = (*I)->getHeight(); + for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { + SUnit *SU = *I; + for (; SU->getHeight() > HazardCycle; ++HazardCycle) { + HazardRec->RecedeCycle(); + } + EmitNode(SU); + } +} + +/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in +/// BTCycle in order to schedule a specific node. +void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { + SUnit *OldSU = Sequence.back(); + while (true) { + Sequence.pop_back(); + // FIXME: use ready cycle instead of height + CurCycle = OldSU->getHeight(); + UnscheduleNodeBottomUp(OldSU); + AvailableQueue->setCurCycle(CurCycle); + if (OldSU == BtSU) + break; + OldSU = Sequence.back(); + } + + assert(!SU->isSucc(OldSU) && "Something is wrong!"); + + RestoreHazardCheckerBottomUp(); + + ReleasePending(); + + ++NumBacktracks; +} + +static bool isOperandOf(const SUnit *SU, SDNode *N) { + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isOperandOf(N)) + return true; + } + return false; +} + +/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled +/// successors to the newly created node. +SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { + SDNode *N = SU->getNode(); + if (!N) + return nullptr; + + if (SU->getNode()->getGluedNode()) + return nullptr; + + SUnit *NewSU; + bool TryUnfold = false; + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue) + return nullptr; + else if (VT == MVT::Other) + TryUnfold = true; + } + for (const SDValue &Op : N->op_values()) { + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); + if (VT == MVT::Glue) + return nullptr; + } + + if (TryUnfold) { + SmallVector<SDNode*, 2> NewNodes; + if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) + return nullptr; + + // unfolding an x86 DEC64m operation results in store, dec, load which + // can't be handled here so quit + if (NewNodes.size() == 3) + return nullptr; + + DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); + assert(NewNodes.size() == 2 && "Expected a load folding node!"); + + N = NewNodes[1]; + SDNode *LoadNode = NewNodes[0]; + unsigned NumVals = N->getNumValues(); + unsigned OldNumVals = SU->getNode()->getNumValues(); + for (unsigned i = 0; i != NumVals; ++i) + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); + DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), + SDValue(LoadNode, 1)); + + // LoadNode may already exist. This can happen when there is another + // load from the same location and producing the same type of value + // but it has different alignment or volatileness. + bool isNewLoad = true; + SUnit *LoadSU; + if (LoadNode->getNodeId() != -1) { + LoadSU = &SUnits[LoadNode->getNodeId()]; + isNewLoad = false; + } else { + LoadSU = CreateNewSUnit(LoadNode); + LoadNode->setNodeId(LoadSU->NodeNum); + + InitNumRegDefsLeft(LoadSU); + computeLatency(LoadSU); + } + + SUnit *NewSU = CreateNewSUnit(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NewSU->NodeNum); + + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + NewSU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + NewSU->isCommutable = true; + + InitNumRegDefsLeft(NewSU); + computeLatency(NewSU); + + // Record all the edges to and from the old SU, by category. + SmallVector<SDep, 4> ChainPreds; + SmallVector<SDep, 4> ChainSuccs; + SmallVector<SDep, 4> LoadPreds; + SmallVector<SDep, 4> NodePreds; + SmallVector<SDep, 4> NodeSuccs; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainPreds.push_back(*I); + else if (isOperandOf(I->getSUnit(), LoadNode)) + LoadPreds.push_back(*I); + else + NodePreds.push_back(*I); + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) + ChainSuccs.push_back(*I); + else + NodeSuccs.push_back(*I); + } + + // Now assign edges to the newly-created nodes. + for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { + const SDep &Pred = ChainPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { + const SDep &Pred = LoadPreds[i]; + RemovePred(SU, Pred); + if (isNewLoad) + AddPred(LoadSU, Pred); + } + for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { + const SDep &Pred = NodePreds[i]; + RemovePred(SU, Pred); + AddPred(NewSU, Pred); + } + for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { + SDep D = NodeSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + D.setSUnit(NewSU); + AddPred(SuccDep, D); + // Balance register pressure. + if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled + && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) + --NewSU->NumRegDefsLeft; + } + for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { + SDep D = ChainSuccs[i]; + SUnit *SuccDep = D.getSUnit(); + D.setSUnit(SU); + RemovePred(SuccDep, D); + if (isNewLoad) { + D.setSUnit(LoadSU); + AddPred(SuccDep, D); + } + } + + // Add a data dependency to reflect that NewSU reads the value defined + // by LoadSU. + SDep D(LoadSU, SDep::Data, 0); + D.setLatency(LoadSU->Latency); + AddPred(NewSU, D); + + if (isNewLoad) + AvailableQueue->addNode(LoadSU); + AvailableQueue->addNode(NewSU); + + ++NumUnfolds; + + if (NewSU->NumSuccsLeft == 0) { + NewSU->isAvailable = true; + return NewSU; + } + SU = NewSU; + } + + DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); + NewSU = CreateClone(SU); + + // New SUnit has the exact same predecessors. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) + if (!I->isArtificial()) + AddPred(NewSU, *I); + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(NewSU); + AddPred(SuccSU, D); + D.setSUnit(SU); + DelDeps.push_back(std::make_pair(SuccSU, D)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(NewSU); + + ++NumDups; + return NewSU; +} + +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl<SUnit*> &Copies) { + SUnit *CopyFromSU = CreateNewSUnit(nullptr); + CopyFromSU->CopySrcRC = SrcRC; + CopyFromSU->CopyDstRC = DestRC; + + SUnit *CopyToSU = CreateNewSUnit(nullptr); + CopyToSU->CopySrcRC = DestRC; + CopyToSU->CopyDstRC = SrcRC; + + // Only copy scheduled successors. Cut them from old node's successor + // list and move them over. + SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isArtificial()) + continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU->isScheduled) { + SDep D = *I; + D.setSUnit(CopyToSU); + AddPred(SuccSU, D); + DelDeps.push_back(std::make_pair(SuccSU, *I)); + } + else { + // Avoid scheduling the def-side copy before other successors. Otherwise + // we could introduce another physreg interference on the copy and + // continue inserting copies indefinitely. + AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial)); + } + } + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) + RemovePred(DelDeps[i].first, DelDeps[i].second); + + SDep FromDep(SU, SDep::Data, Reg); + FromDep.setLatency(SU->Latency); + AddPred(CopyFromSU, FromDep); + SDep ToDep(CopyFromSU, SDep::Data, 0); + ToDep.setLatency(CopyFromSU->Latency); + AddPred(CopyToSU, ToDep); + + AvailableQueue->updateNode(SU); + AvailableQueue->addNode(CopyFromSU); + AvailableQueue->addNode(CopyToSU); + Copies.push_back(CopyFromSU); + Copies.push_back(CopyToSU); + + ++NumPRCopies; +} + +/// getPhysicalRegisterVT - Returns the ValueType of the physical register +/// definition of the specified node. +/// FIXME: Move to SelectionDAG? +static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, + const TargetInstrInfo *TII) { + unsigned NumRes; + if (N->getOpcode() == ISD::CopyFromReg) { + // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type. + NumRes = 1; + } else { + const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); + assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); + NumRes = MCID.getNumDefs(); + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + if (Reg == *ImpDef) + break; + ++NumRes; + } + } + return N->getSimpleValueType(NumRes); +} + +/// CheckForLiveRegDef - Return true and update live register vector if the +/// specified register def of the specified SUnit clobbers any "live" registers. +static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, + SUnit **LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVectorImpl<unsigned> &LRegs, + const TargetRegisterInfo *TRI) { + for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { + + // Check if Ref is live. + if (!LiveRegDefs[*AliasI]) continue; + + // Allow multiple uses of the same def. + if (LiveRegDefs[*AliasI] == SU) continue; + + // Add Reg to the set of interfering live regs. + if (RegAdded.insert(*AliasI).second) { + LRegs.push_back(*AliasI); + } + } +} + +/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered +/// by RegMask, and add them to LRegs. +static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, + ArrayRef<SUnit*> LiveRegDefs, + SmallSet<unsigned, 4> &RegAdded, + SmallVectorImpl<unsigned> &LRegs) { + // Look at all live registers. Skip Reg0 and the special CallResource. + for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { + if (!LiveRegDefs[i]) continue; + if (LiveRegDefs[i] == SU) continue; + if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; + if (RegAdded.insert(i).second) + LRegs.push_back(i); + } +} + +/// getNodeRegMask - Returns the register mask attached to an SDNode, if any. +static const uint32_t *getNodeRegMask(const SDNode *N) { + for (const SDValue &Op : N->op_values()) + if (const auto *RegOp = dyn_cast<RegisterMaskSDNode>(Op.getNode())) + return RegOp->getRegMask(); + return nullptr; +} + +/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay +/// scheduling of the given node to satisfy live physical register dependencies. +/// If the specific node is the last one that's available to schedule, do +/// whatever is necessary (i.e. backtracking or cloning) to make it possible. +bool ScheduleDAGRRList:: +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) { + if (NumLiveRegs == 0) + return false; + + SmallSet<unsigned, 4> RegAdded; + // If this node would clobber any "live" register, then it's not ready. + // + // If SU is the currently live definition of the same register that it uses, + // then we are free to schedule it. + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) + CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs.get(), + RegAdded, LRegs, TRI); + } + + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { + if (Node->getOpcode() == ISD::INLINEASM) { + // Inline asm can clobber physical defs. + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) + --NumOps; // Ignore the glue operand. + + for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { + unsigned Flags = + cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + + ++i; // Skip the ID value. + if (InlineAsm::isRegDefKind(Flags) || + InlineAsm::isRegDefEarlyClobberKind(Flags) || + InlineAsm::isClobberKind(Flags)) { + // Check for def of register or earlyclobber register. + for (; NumVals; --NumVals, ++i) { + unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + } else + i += NumVals; + } + continue; + } + + if (!Node->isMachineOpcode()) + continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && + RegAdded.insert(CallResource).second) + LRegs.push_back(CallResource); + } + } + if (const uint32_t *RegMask = getNodeRegMask(Node)) + CheckForLiveRegDefMasked(SU, RegMask, + makeArrayRef(LiveRegDefs.get(), TRI->getNumRegs()), + RegAdded, LRegs); + + const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); + if (!MCID.ImplicitDefs) + continue; + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); + } + + return !LRegs.empty(); +} + +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVectorImpl<unsigned> &LRegs = LRegsPos->second; + if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + +/// Return a node that can be scheduled in this cycle. Requirements: +/// (1) Ready: latency has been satisfied +/// (2) No Hazards: resources are available +/// (3) No Interferences: may unschedule to break register interferences. +SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { + SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop(); + while (CurSU) { + SmallVector<unsigned, 4> LRegs; + if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) + break; + DEBUG(dbgs() << " Interfering reg " << + (LRegs[0] == TRI->getNumRegs() ? "CallResource" + : TRI->getName(LRegs[0])) + << " SU #" << CurSU->NodeNum << '\n'); + std::pair<LRegsMapT::iterator, bool> LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Interferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } + CurSU = AvailableQueue->pop(); + } + if (CurSU) + return CurSU; + + // All candidates are delayed due to live physical reg dependencies. + // Try backtracking, code duplication, or inserting cross class copies + // to resolve it. + for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { + SUnit *TrySU = Interferences[i]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + + // Try unscheduling up to the point where it's safe to schedule + // this node. + SUnit *BtSU = nullptr; + unsigned LiveCycle = UINT_MAX; + for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { + unsigned Reg = LRegs[j]; + if (LiveRegGens[Reg]->getHeight() < LiveCycle) { + BtSU = LiveRegGens[Reg]; + LiveCycle = BtSU->getHeight(); + } + } + if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! + BacktrackBottomUp(TrySU, BtSU); + + // Force the current node to be scheduled before the node that + // requires the physical reg dep. + if (BtSU->isAvailable) { + BtSU->isAvailable = false; + if (!BtSU->isPending) + AvailableQueue->remove(BtSU); + } + DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" + << TrySU->NodeNum << ")\n"); + AddPred(TrySU, SDep(BtSU, SDep::Artificial)); + + // If one or more successors has been unscheduled, then the current + // node is no longer available. + if (!TrySU->isAvailable || !TrySU->NodeQueueId) + CurSU = AvailableQueue->pop(); + else { + // Available and in AvailableQueue + AvailableQueue->remove(TrySU); + CurSU = TrySU; + } + // Interferences has been mutated. We must break. + break; + } + } + + if (!CurSU) { + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. + SUnit *TrySU = Interferences[0]; + SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU]; + assert(LRegs.size() == 1 && "Can't handle this yet!"); + unsigned Reg = LRegs[0]; + SUnit *LRDef = LiveRegDefs[Reg]; + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is the same as RC, then it must be possible + // copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's possible to + // copy the value but it require cross register class copies and it is + // expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. + SUnit *NewDef = nullptr; + if (DestRC != RC) { + NewDef = CopyAndMoveSuccessors(LRDef); + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical register dependency!"); + } + if (!NewDef) { + // Issue copies, these can be expensive cross register class copies. + SmallVector<SUnit*, 2> Copies; + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum + << " to SU #" << Copies.front()->NodeNum << "\n"); + AddPred(TrySU, SDep(Copies.front(), SDep::Artificial)); + NewDef = Copies.back(); + } + + DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum + << " to SU #" << TrySU->NodeNum << "\n"); + LiveRegDefs[Reg] = NewDef; + AddPred(NewDef, SDep(TrySU, SDep::Artificial)); + TrySU->isAvailable = false; + CurSU = NewDef; + } + assert(CurSU && "Unable to resolve live physical register dependencies!"); + return CurSU; +} + +/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up +/// schedulers. +void ScheduleDAGRRList::ListScheduleBottomUp() { + // Release any predecessors of the special Exit node. + ReleasePredecessors(&ExitSU); + + // Add root to Available queue. + if (!SUnits.empty()) { + SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; + assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); + RootSU->isAvailable = true; + AvailableQueue->push(RootSU); + } + + // While Available queue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !Interferences.empty()) { + DEBUG(dbgs() << "\nExamining Available:\n"; + AvailableQueue->dump(this)); + + // Pick the best node to schedule taking all constraints into + // consideration. + SUnit *SU = PickNodeToScheduleBottomUp(); + + AdvancePastStalls(SU); + + ScheduleNodeBottomUp(SU); + + while (AvailableQueue->empty() && !PendingQueue.empty()) { + // Advance the cycle to free resources. Skip ahead to the next ready SU. + assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); + AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); + } + } + + // Reverse the order if it is bottom up. + std::reverse(Sequence.begin(), Sequence.end()); + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/true); +#endif +} + +//===----------------------------------------------------------------------===// +// RegReductionPriorityQueue Definition +//===----------------------------------------------------------------------===// +// +// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers +// to reduce register pressure. +// +namespace { +class RegReductionPQBase; + +struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { + bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } +}; + +#ifndef NDEBUG +template<class SF> +struct reverse_sort : public queue_sort { + SF &SortFunc; + reverse_sort(SF &sf) : SortFunc(sf) {} + + bool operator()(SUnit* left, SUnit* right) const { + // reverse left/right rather than simply !SortFunc(left, right) + // to expose different paths in the comparison logic. + return SortFunc(right, left); + } +}; +#endif // NDEBUG + +/// bu_ls_rr_sort - Priority function for bottom up register pressure +// reduction scheduler. +struct bu_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// src_ls_rr_sort - Priority function for source order scheduler. +struct src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + src_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// hybrid_ls_rr_sort - Priority function for hybrid scheduler. +struct hybrid_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + hybrid_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) +// scheduler. +struct ilp_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + ilp_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + + bool isReady(SUnit *SU, unsigned CurCycle) const; + + bool operator()(SUnit* left, SUnit* right) const; +}; + +class RegReductionPQBase : public SchedulingPriorityQueue { +protected: + std::vector<SUnit*> Queue; + unsigned CurQueueId; + bool TracksRegPressure; + bool SrcOrder; + + // SUnits - The SUnits for the current graph. + std::vector<SUnit> *SUnits; + + MachineFunction &MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const TargetLowering *TLI; + ScheduleDAGRRList *scheduleDAG; + + // SethiUllmanNumbers - The SethiUllman number for each node. + std::vector<unsigned> SethiUllmanNumbers; + + /// RegPressure - Tracking current reg pressure per register class. + /// + std::vector<unsigned> RegPressure; + + /// RegLimit - Tracking the number of allocatable registers per register + /// class. + std::vector<unsigned> RegLimit; + +public: + RegReductionPQBase(MachineFunction &mf, + bool hasReadyFilter, + bool tracksrp, + bool srcorder, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : SchedulingPriorityQueue(hasReadyFilter), + CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), + MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) { + if (TracksRegPressure) { + unsigned NumRC = TRI->getNumRegClasses(); + RegLimit.resize(NumRC); + RegPressure.resize(NumRC); + std::fill(RegLimit.begin(), RegLimit.end(), 0); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) + RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); + } + } + + void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { + scheduleDAG = scheduleDag; + } + + ScheduleHazardRecognizer* getHazardRec() { + return scheduleDAG->getHazardRec(); + } + + void initNodes(std::vector<SUnit> &sunits) override; + + void addNode(const SUnit *SU) override; + + void updateNode(const SUnit *SU) override; + + void releaseState() override { + SUnits = nullptr; + SethiUllmanNumbers.clear(); + std::fill(RegPressure.begin(), RegPressure.end(), 0); + } + + unsigned getNodePriority(const SUnit *SU) const; + + unsigned getNodeOrdering(const SUnit *SU) const { + if (!SU->getNode()) return 0; + + return SU->getNode()->getIROrder(); + } + + bool empty() const override { return Queue.empty(); } + + void push(SUnit *U) override { + assert(!U->NodeQueueId && "Node in the queue already"); + U->NodeQueueId = ++CurQueueId; + Queue.push_back(U); + } + + void remove(SUnit *SU) override { + assert(!Queue.empty() && "Queue is empty!"); + assert(SU->NodeQueueId != 0 && "Not in queue!"); + std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), + SU); + if (I != std::prev(Queue.end())) + std::swap(*I, Queue.back()); + Queue.pop_back(); + SU->NodeQueueId = 0; + } + + bool tracksRegPressure() const override { return TracksRegPressure; } + + void dumpRegPressure() const; + + bool HighRegPressure(const SUnit *SU) const; + + bool MayReduceRegPressure(SUnit *SU) const; + + int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; + + void scheduledNode(SUnit *SU) override; + + void unscheduledNode(SUnit *SU) override; + +protected: + bool canClobber(const SUnit *SU, const SUnit *Op); + void AddPseudoTwoAddrDeps(); + void PrescheduleNodesWithMultipleUses(); + void CalculateSethiUllmanNumbers(); +}; + +template<class SF> +static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { + std::vector<SUnit *>::iterator Best = Q.begin(); + for (std::vector<SUnit *>::iterator I = std::next(Q.begin()), + E = Q.end(); I != E; ++I) + if (Picker(*Best, *I)) + Best = I; + SUnit *V = *Best; + if (Best != std::prev(Q.end())) + std::swap(*Best, Q.back()); + Q.pop_back(); + return V; +} + +template<class SF> +SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { +#ifndef NDEBUG + if (DAG->StressSched) { + reverse_sort<SF> RPicker(Picker); + return popFromQueueImpl(Q, RPicker); + } +#endif + (void)DAG; + return popFromQueueImpl(Q, Picker); +} + +template<class SF> +class RegReductionPriorityQueue : public RegReductionPQBase { + SF Picker; + +public: + RegReductionPriorityQueue(MachineFunction &mf, + bool tracksrp, + bool srcorder, + const TargetInstrInfo *tii, + const TargetRegisterInfo *tri, + const TargetLowering *tli) + : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, + tii, tri, tli), + Picker(this) {} + + bool isBottomUp() const override { return SF::IsBottomUp; } + + bool isReady(SUnit *U) const override { + return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); + } + + SUnit *pop() override { + if (Queue.empty()) return nullptr; + + SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); + V->NodeQueueId = 0; + return V; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump(ScheduleDAG *DAG) const override { + // Emulate pop() without clobbering NodeQueueIds. + std::vector<SUnit*> DumpQueue = Queue; + SF DumpPicker = Picker; + while (!DumpQueue.empty()) { + SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); + dbgs() << "Height " << SU->getHeight() << ": "; + SU->dump(DAG); + } + } +#endif +}; + +typedef RegReductionPriorityQueue<bu_ls_rr_sort> +BURegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<src_ls_rr_sort> +SrcRegReductionPriorityQueue; + +typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> +HybridBURRPriorityQueue; + +typedef RegReductionPriorityQueue<ilp_ls_rr_sort> +ILPBURRPriorityQueue; +} // end anonymous namespace + +//===----------------------------------------------------------------------===// +// Static Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + +/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. +/// Smaller number is the higher priority. +static unsigned +CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { + unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; + if (SethiUllmanNumber != 0) + return SethiUllmanNumber; + + unsigned Extra = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); + if (PredSethiUllman > SethiUllmanNumber) { + SethiUllmanNumber = PredSethiUllman; + Extra = 0; + } else if (PredSethiUllman == SethiUllmanNumber) + ++Extra; + } + + SethiUllmanNumber += Extra; + + if (SethiUllmanNumber == 0) + SethiUllmanNumber = 1; + + return SethiUllmanNumber; +} + +/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all +/// scheduling units. +void RegReductionPQBase::CalculateSethiUllmanNumbers() { + SethiUllmanNumbers.assign(SUnits->size(), 0); + + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); +} + +void RegReductionPQBase::addNode(const SUnit *SU) { + unsigned SUSize = SethiUllmanNumbers.size(); + if (SUnits->size() > SUSize) + SethiUllmanNumbers.resize(SUSize*2, 0); + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +void RegReductionPQBase::updateNode(const SUnit *SU) { + SethiUllmanNumbers[SU->NodeNum] = 0; + CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); +} + +// Lower priority means schedule further down. For bottom-up scheduling, lower +// priority SUs are scheduled before higher priority SUs. +unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { + assert(SU->NodeNum < SethiUllmanNumbers.size()); + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return 0; + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return 0; + if (SU->NumSuccs == 0 && SU->NumPreds != 0) + // If SU does not have a register use, i.e. it doesn't produce a value + // that would be consumed (e.g. store), then it terminates a chain of + // computation. Give it a large SethiUllman number so it will be + // scheduled right before its predecessors that it doesn't lengthen + // their live ranges. + return 0xffff; + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return 0; +#if 1 + return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif +} + +//===----------------------------------------------------------------------===// +// Register Pressure Tracking +//===----------------------------------------------------------------------===// + +void RegReductionPQBase::dumpRegPressure() const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), + E = TRI->regclass_end(); I != E; ++I) { + const TargetRegisterClass *RC = *I; + unsigned Id = RC->getID(); + unsigned RP = RegPressure[Id]; + if (!RP) continue; + DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / " + << RegLimit[Id] << '\n'); + } +#endif +} + +bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { + if (!TLI) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + + if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) + return true; + } + } + return false; +} + +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { + const SDNode *N = SU->getNode(); + + if (!N->isMachineOpcode() || !SU->NumSuccs) + return false; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = N->getSimpleValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + return true; + } + return false; +} + +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { + LiveUses = 0; + int PDiff = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + if (PredSU->getNode()->isMachineOpcode()) + ++LiveUses; + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + MVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + ++PDiff; + } + } + const SDNode *N = SU->getNode(); + + if (!N || !N->isMachineOpcode() || !SU->NumSuccs) + return PDiff; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = N->getSimpleValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + --PDiff; + } + return PDiff; +} + +void RegReductionPQBase::scheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + if (!SU->getNode()) + return; + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + continue; + } + // FIXME: The ScheduleDAG currently loses information about which of a + // node's values is consumed by each dependence. Consequently, if the node + // defines multiple register classes, we don't know which to pressurize + // here. Instead the following loop consumes the register defs in an + // arbitrary order. At least it handles the common case of clustered loads + // to the same class. For precise liveness, each SDep needs to indicate the + // result number. But that tightly couples the ScheduleDAG with the + // SelectionDAG making updates tricky. A simpler hack would be to attach a + // value type or register class to SDep. + // + // The most important aspect of register tracking is balancing the increase + // here with the reduction further below. Note that this SU may use multiple + // defs in PredSU. The can't be determined here, but we've already + // compensated by reducing NumRegDefsLeft in PredSU during + // ScheduleDAGSDNodes::AddSchedEdges. + --PredSU->NumRegDefsLeft; + unsigned SkipRegDefs = PredSU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs) + continue; + + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + RegPressure[RCId] += Cost; + break; + } + } + + // We should have this assert, but there may be dead SDNodes that never + // materialize as SUnits, so they don't appear to generate liveness. + //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); + int SkipRegDefs = (int)SU->NumRegDefsLeft; + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { + if (SkipRegDefs > 0) + continue; + unsigned RCId, Cost; + GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF); + if (RegPressure[RCId] < Cost) { + // Register pressure tracking is imprecise. This can happen. But we try + // hard not to let it happen because it likely results in poor scheduling. + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); + RegPressure[RCId] = 0; + } + else { + RegPressure[RCId] -= Cost; + } + } + dumpRegPressure(); +} + +void RegReductionPQBase::unscheduledNode(SUnit *SU) { + if (!TracksRegPressure) + return; + + const SDNode *N = SU->getNode(); + if (!N) return; + + if (!N->isMachineOpcode()) { + if (N->getOpcode() != ISD::CopyToReg) + return; + } else { + unsigned Opc = N->getMachineOpcode(); + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::INSERT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::REG_SEQUENCE || + Opc == TargetOpcode::IMPLICIT_DEF) + return; + } + + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only + // counts data deps. + if (PredSU->NumSuccsLeft != PredSU->Succs.size()) + continue; + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyFromReg) { + MVT VT = PN->getSimpleValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) + continue; + if (POpc == TargetOpcode::EXTRACT_SUBREG || + POpc == TargetOpcode::INSERT_SUBREG || + POpc == TargetOpcode::SUBREG_TO_REG) { + MVT VT = PN->getSimpleValueType(0); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + continue; + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + MVT VT = PN->getSimpleValueType(i); + if (!PN->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) + // Register pressure tracking is imprecise. This can happen. + RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); + } + } + + // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() + // may transfer data dependencies to CopyToReg. + if (SU->NumSuccs && N->isMachineOpcode()) { + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + } + + dumpRegPressure(); +} + +//===----------------------------------------------------------------------===// +// Dynamic Node Priority for Register Pressure Reduction +//===----------------------------------------------------------------------===// + +/// closestSucc - Returns the scheduled cycle of the successor which is +/// closest to the current cycle. +static unsigned closestSucc(const SUnit *SU) { + unsigned MaxHeight = 0; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain succs + unsigned Height = I->getSUnit()->getHeight(); + // If there are bunch of CopyToRegs stacked up, they should be considered + // to be at the same position. + if (I->getSUnit()->getNode() && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) + Height = closestSucc(I->getSUnit())+1; + if (Height > MaxHeight) + MaxHeight = Height; + } + return MaxHeight; +} + +/// calcMaxScratches - Returns an cost estimate of the worse case requirement +/// for scratch registers, i.e. number of data dependencies. +static unsigned calcMaxScratches(const SUnit *SU) { + unsigned Scratches = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + Scratches++; + } + return Scratches; +} + +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are +/// CopyToReg to a virtual register. This SU def is probably a liveout and +/// it has no other use. It should be scheduled closer to the terminator. +static bool hasOnlyLiveOutUses(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *SuccSU = I->getSUnit(); + if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { + unsigned Reg = + cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; + } +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } + } +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); + return true; + } + } + return false; +} + +// Check for either a dependence (latency) or resource (hazard) stall. +// +// Note: The ScheduleHazardRecognizer interface requires a non-const SU. +static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { + if ((int)SPQ->getCurCycle() < Height) return true; + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return true; + return false; +} + +// Return -1 if left has higher priority, 1 if right has higher priority. +// Return 0 if latency-based priority is equivalent. +static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, + RegReductionPQBase *SPQ) { + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; + + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && + BUHasStall(left, LHeight, SPQ); + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && + BUHasStall(right, RHeight, SPQ); + + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + if (LStall) { + if (!RStall) + return 1; + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } else if (RStall) + return -1; + + // If either node is scheduling for latency, sort them by height/depth + // and latency. + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { + // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer + // is enabled, grouping instructions by cycle, then its height is already + // covered so only its depth matters. We also reach this point if both stall + // but have the same height. + if (!SPQ->getHazardRec()->isEnabled()) { + if (LHeight != RHeight) + return LHeight > RHeight ? 1 : -1; + } + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; + } + if (left->Latency != right->Latency) + return left->Latency > right->Latency ? 1 : -1; + } + return 0; +} + +static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + #ifndef NDEBUG + static const char *const PhysRegMsg[] = { " has no physreg", + " defines a physreg" }; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. + unsigned LPriority = SPQ->getNodePriority(left); + unsigned RPriority = SPQ->getNodePriority(right); + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + + if (LPriority != RPriority) + return LPriority > RPriority; + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } + + // Try schedule def + use closer when Sethi-Ullman numbers are the same. + // e.g. + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // and the following instructions are both ready. + // t2 = op c3 + // t4 = op c4 + // + // Then schedule t2 = op first. + // i.e. + // t4 = op c4 + // t2 = op c3 + // t1 = op t2, c1 + // t3 = op t4, c2 + // + // This creates more short live intervals. + unsigned LDist = closestSucc(left); + unsigned RDist = closestSucc(right); + if (LDist != RDist) + return LDist < RDist; + + // How many registers becomes live when the node is scheduled. + unsigned LScratch = calcMaxScratches(left); + unsigned RScratch = calcMaxScratches(right); + if (LScratch != RScratch) + return LScratch > RScratch; + + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); + + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { + int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + else { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + + if (left->getDepth() != right->getDepth()) + return left->getDepth() < right->getDepth(); + } + + assert(left->NodeQueueId && right->NodeQueueId && + "NodeQueueId cannot be zero"); + return (left->NodeQueueId > right->NodeQueueId); +} + +// Bottom up +bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + return BURRSort(left, right, SPQ); +} + +// Source order, otherwise bottom up. +bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + + return BURRSort(left, right, SPQ); +} + +// If the time between now and when the instruction will be ready can cover +// the spill code, then avoid adding it to the ready queue. This gives long +// stalls highest priority and allows hoisting across calls. It should also +// speed up processing the available queue. +bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + static const unsigned ReadyDelay = 3; + + if (SPQ->MayReduceRegPressure(SU)) return true; + + if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +// Return true if right should be scheduled with higher priority than left. +bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + // Avoid causing spills. If register pressure is high, schedule for + // register pressure reduction. + if (LHigh && !RHigh) { + DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" + << right->NodeNum << ")\n"); + return true; + } + else if (!LHigh && RHigh) { + DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" + << left->NodeNum << ")\n"); + return false; + } + if (!LHigh && !RHigh) { + int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); + if (result != 0) + return result > 0; + } + return BURRSort(left, right, SPQ); +} + +// Schedule as many instructions in each cycle as possible. So don't make an +// instruction available unless it is ready in the current cycle. +bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { + if (SU->getHeight() > CurCycle) return false; + + if (SPQ->getHazardRec()->getHazardType(SU, 0) + != ScheduleHazardRecognizer::NoHazard) + return false; + + return true; +} + +static bool canEnableCoalescing(SUnit *SU) { + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return true; + + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return true; + + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return true; + + return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. +bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + if (left->isCall || right->isCall) + // No way to compute latency of calls. + return BURRSort(left, right, SPQ); + + unsigned LLiveUses = 0, RLiveUses = 0; + int LPDiff = 0, RPDiff = 0; + if (!DisableSchedRegPressure || !DisableSchedLiveUses) { + LPDiff = SPQ->RegPressureDiff(left, LLiveUses); + RPDiff = SPQ->RegPressureDiff(right, RLiveUses); + } + if (!DisableSchedRegPressure && LPDiff != RPDiff) { + DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff + << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + return LPDiff > RPDiff; + } + + if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { + bool LReduce = canEnableCoalescing(left); + bool RReduce = canEnableCoalescing(right); + if (LReduce && !RReduce) return false; + if (RReduce && !LReduce) return true; + } + + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { + DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + return LLiveUses < RLiveUses; + } + + if (!DisableSchedStalls) { + bool LStall = BUHasStall(left, left->getHeight(), SPQ); + bool RStall = BUHasStall(right, right->getHeight(), SPQ); + if (LStall != RStall) + return left->getHeight() > right->getHeight(); + } + + if (!DisableSchedCriticalPath) { + int spread = (int)left->getDepth() - (int)right->getDepth(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum << "): " + << right->getDepth() << "\n"); + return left->getDepth() < right->getDepth(); + } + } + + if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { + int spread = (int)left->getHeight() - (int)right->getHeight(); + if (std::abs(spread) > MaxReorderWindow) + return left->getHeight() > right->getHeight(); + } + + return BURRSort(left, right, SPQ); +} + +void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure && !SrcOrder) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + +//===----------------------------------------------------------------------===// +// Preschedule for Register Pressure +//===----------------------------------------------------------------------===// + +bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { + if (SU->isTwoAddress) { + unsigned Opc = SU->getNode()->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; + for (unsigned i = 0; i != NumOps; ++i) { + if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { + SDNode *DU = SU->getNode()->getOperand(i).getNode(); + if (DU->getNodeId() != -1 && + Op->OrigNode == &(*SUnits)[DU->getNodeId()]) + return true; + } + } + } + return false; +} + +/// canClobberReachingPhysRegUse - True if SU would clobber one of it's +/// successor's explicit physregs whose definition can reach DepSU. +/// i.e. DepSU should not be scheduled above SU. +static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, + ScheduleDAGRRList *scheduleDAG, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + const MCPhysReg *ImpDefs + = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); + const uint32_t *RegMask = getNodeRegMask(SU->getNode()); + if(!ImpDefs && !RegMask) + return false; + + for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); + SI != SE; ++SI) { + SUnit *SuccSU = SI->getSUnit(); + for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), + PE = SuccSU->Preds.end(); PI != PE; ++PI) { + if (!PI->isAssignedRegDep()) + continue; + + if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + + if (ImpDefs) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + // Return true if SU clobbers this physical register use and the + // definition of the register reaches from DepSU. IsReachable queries + // a topological forward sort of the DAG (following the successors). + if (TRI->regsOverlap(*ImpDef, PI->getReg()) && + scheduleDAG->IsReachable(DepSU, PI->getSUnit())) + return true; + } + } + return false; +} + +/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's +/// physical register defs. +static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, + const TargetInstrInfo *TII, + const TargetRegisterInfo *TRI) { + SDNode *N = SuccSU->getNode(); + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + assert(ImpDefs && "Caller should check hasPhysRegDefs"); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (!SUNode->isMachineOpcode()) + continue; + const MCPhysReg *SUImpDefs = + TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); + const uint32_t *SURegMask = getNodeRegMask(SUNode); + if (!SUImpDefs && !SURegMask) + continue; + for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { + MVT VT = N->getSimpleValueType(i); + if (VT == MVT::Glue || VT == MVT::Other) + continue; + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned Reg = ImpDefs[i - NumDefs]; + if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) + return true; + if (!SUImpDefs) + continue; + for (;*SUImpDefs; ++SUImpDefs) { + unsigned SUReg = *SUImpDefs; + if (TRI->regsOverlap(Reg, SUReg)) + return true; + } + } + } + return false; +} + +/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses +/// are not handled well by the general register pressure reduction +/// heuristics. When presented with code like this: +/// +/// N +/// / | +/// / | +/// U store +/// | +/// ... +/// +/// the heuristics tend to push the store up, but since the +/// operand of the store has another use (U), this would increase +/// the length of that other use (the U->N edge). +/// +/// This function transforms code like the above to route U's +/// dependence through the store when possible, like this: +/// +/// N +/// || +/// || +/// store +/// | +/// U +/// | +/// ... +/// +/// This results in the store being scheduled immediately +/// after N, which shortens the U->N live range, reducing +/// register pressure. +/// +void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { + // Visit all the nodes in topological order, working top-down. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + // For now, only look at nodes with no data successors, such as stores. + // These are especially important, due to the heuristics in + // getNodePriority for nodes with no data successors. + if (SU->NumSuccs != 0) + continue; + // For now, only look at nodes with exactly one data predecessor. + if (SU->NumPreds != 1) + continue; + // Avoid prescheduling copies to virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyToReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Locate the single data predecessor. + SUnit *PredSU = nullptr; + for (SUnit::const_pred_iterator II = SU->Preds.begin(), + EE = SU->Preds.end(); II != EE; ++II) + if (!II->isCtrl()) { + PredSU = II->getSUnit(); + break; + } + assert(PredSU); + + // Don't rewrite edges that carry physregs, because that requires additional + // support infrastructure. + if (PredSU->hasPhysRegDefs) + continue; + // Short-circuit the case where SU is PredSU's only data successor. + if (PredSU->NumSuccs == 1) + continue; + // Avoid prescheduling to copies from virtual registers, which don't behave + // like other nodes from the perspective of scheduling heuristics. + if (SDNode *N = SU->getNode()) + if (N->getOpcode() == ISD::CopyFromReg && + TargetRegisterInfo::isVirtualRegister + (cast<RegisterSDNode>(N->getOperand(1))->getReg())) + continue; + + // Perform checks on the successors of PredSU. + for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), + EE = PredSU->Succs.end(); II != EE; ++II) { + SUnit *PredSuccSU = II->getSUnit(); + if (PredSuccSU == SU) continue; + // If PredSU has another successor with no data successors, for + // now don't attempt to choose either over the other. + if (PredSuccSU->NumSuccs == 0) + goto outer_loop_continue; + // Don't break physical register dependencies. + if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) + if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) + goto outer_loop_continue; + // Don't introduce graph cycles. + if (scheduleDAG->IsReachable(SU, PredSuccSU)) + goto outer_loop_continue; + } + + // Ok, the transformation is safe and the heuristics suggest it is + // profitable. Update the graph. + DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum + << " next to PredSU #" << PredSU->NodeNum + << " to guide scheduling in the presence of multiple uses\n"); + for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { + SDep Edge = PredSU->Succs[i]; + assert(!Edge.isAssignedRegDep()); + SUnit *SuccSU = Edge.getSUnit(); + if (SuccSU != SU) { + Edge.setSUnit(PredSU); + scheduleDAG->RemovePred(SuccSU, Edge); + scheduleDAG->AddPred(SU, Edge); + Edge.setSUnit(SU); + scheduleDAG->AddPred(SuccSU, Edge); + --i; + } + } + outer_loop_continue:; + } +} + +/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses +/// it as a def&use operand. Add a pseudo control edge from it to the other +/// node (if it won't create a cycle) so the two-address one will be scheduled +/// first (lower in the schedule). If both nodes are two-address, favor the +/// one that has a CopyToReg use (more likely to be a loop induction update). +/// If both are two-address, but one is commutable while the other is not +/// commutable, favor the one that's not commutable. +void RegReductionPQBase::AddPseudoTwoAddrDeps() { + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { + SUnit *SU = &(*SUnits)[i]; + if (!SU->isTwoAddress) + continue; + + SDNode *Node = SU->getNode(); + if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) + continue; + + bool isLiveOut = hasOnlyLiveOutUses(SU); + unsigned Opc = Node->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + unsigned NumRes = MCID.getNumDefs(); + unsigned NumOps = MCID.getNumOperands() - NumRes; + for (unsigned j = 0; j != NumOps; ++j) { + if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) + continue; + SDNode *DU = SU->getNode()->getOperand(j).getNode(); + if (DU->getNodeId() == -1) + continue; + const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; + if (!DUSU) continue; + for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), + E = DUSU->Succs.end(); I != E; ++I) { + if (I->isCtrl()) continue; + SUnit *SuccSU = I->getSUnit(); + if (SuccSU == SU) + continue; + // Be conservative. Ignore if nodes aren't at roughly the same + // depth and height. + if (SuccSU->getHeight() < SU->getHeight() && + (SU->getHeight() - SuccSU->getHeight()) > 1) + continue; + // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge + // constrains whatever is using the copy, instead of the copy + // itself. In the case that the copy is coalesced, this + // preserves the intent of the pseudo two-address heurietics. + while (SuccSU->Succs.size() == 1 && + SuccSU->getNode()->isMachineOpcode() && + SuccSU->getNode()->getMachineOpcode() == + TargetOpcode::COPY_TO_REGCLASS) + SuccSU = SuccSU->Succs.front().getSUnit(); + // Don't constrain non-instruction nodes. + if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) + continue; + // Don't constrain nodes with physical register defs if the + // predecessor can clobber them. + if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { + if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) + continue; + } + // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; + // these may be coalesced away. We want them close to their uses. + unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); + if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || + SuccOpc == TargetOpcode::INSERT_SUBREG || + SuccOpc == TargetOpcode::SUBREG_TO_REG) + continue; + if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && + (!canClobber(SuccSU, DUSU) || + (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || + (!SU->isCommutable && SuccSU->isCommutable)) && + !scheduleDAG->IsReachable(SuccSU, SU)) { + DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" + << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); + scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial)); + } + } + } + } +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +llvm::ScheduleDAGSDNodes * +llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + BURegReductionPriorityQueue *PQ = + new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + SrcRegReductionPriorityQueue *PQ = + new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; + + HybridBURRPriorityQueue *PQ = + new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * +llvm::createILPListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + const TargetLowering *TLI = IS->TLI; + + ILPBURRPriorityQueue *PQ = + new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp new file mode 100644 index 0000000..2a6c853 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -0,0 +1,913 @@ +//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the ScheduleDAG class, which is a base class used by +// scheduling implementation classes. +// +//===----------------------------------------------------------------------===// + +#include "ScheduleDAGSDNodes.h" +#include "InstrEmitter.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(LoadsClustered, "Number of loads clustered together"); + +// This allows the latency-based scheduler to notice high latency instructions +// without a target itinerary. The choice of number here has more to do with +// balancing scheduler heuristics than with the actual machine latency. +static cl::opt<int> HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + +ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) + : ScheduleDAG(mf), BB(nullptr), DAG(nullptr), + InstrItins(mf.getSubtarget().getInstrItineraryData()) {} + +/// Run - perform scheduling. +/// +void ScheduleDAGSDNodes::Run(SelectionDAG *dag, MachineBasicBlock *bb) { + BB = bb; + DAG = dag; + + // Clear the scheduler's SUnit DAG. + ScheduleDAG::clearDAG(); + Sequence.clear(); + + // Invoke the target's selection of scheduler. + Schedule(); +} + +/// NewSUnit - Creates a new SUnit and return a ptr to it. +/// +SUnit *ScheduleDAGSDNodes::newSUnit(SDNode *N) { +#ifndef NDEBUG + const SUnit *Addr = nullptr; + if (!SUnits.empty()) + Addr = &SUnits[0]; +#endif + SUnits.emplace_back(N, (unsigned)SUnits.size()); + assert((Addr == nullptr || Addr == &SUnits[0]) && + "SUnits std::vector reallocated on the fly!"); + SUnits.back().OrigNode = &SUnits.back(); + SUnit *SU = &SUnits.back(); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + if (!N || + (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF)) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); + return SU; +} + +SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { + SUnit *SU = newSUnit(Old->getNode()); + SU->OrigNode = Old->OrigNode; + SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; + SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; + SU->isTwoAddress = Old->isTwoAddress; + SU->isCommutable = Old->isCommutable; + SU->hasPhysRegDefs = Old->hasPhysRegDefs; + SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; + SU->SchedulingPref = Old->SchedulingPref; + Old->isCloned = true; + return SU; +} + +/// CheckForPhysRegDependency - Check if the dependency between def and use of +/// a specified operand is a physical register dependency. If so, returns the +/// register and the cost of copying the register. +static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, + const TargetRegisterInfo *TRI, + const TargetInstrInfo *TII, + unsigned &PhysReg, int &Cost) { + if (Op != 2 || User->getOpcode() != ISD::CopyToReg) + return; + + unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return; + + unsigned ResNo = User->getOperand(2).getResNo(); + if (Def->getOpcode() == ISD::CopyFromReg && + cast<RegisterSDNode>(Def->getOperand(1))->getReg() == Reg) { + PhysReg = Reg; + } else if (Def->isMachineOpcode()) { + const MCInstrDesc &II = TII->get(Def->getMachineOpcode()); + if (ResNo >= II.getNumDefs() && + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) + PhysReg = Reg; + } + + if (PhysReg != 0) { + const TargetRegisterClass *RC = + TRI->getMinimalPhysRegClass(Reg, Def->getSimpleValueType(ResNo)); + Cost = RC->getCopyCost(); + } +} + +// Helper for AddGlue to clone node operands. +static void CloneNodeWithValues(SDNode *N, SelectionDAG *DAG, ArrayRef<EVT> VTs, + SDValue ExtraOper = SDValue()) { + SmallVector<SDValue, 8> Ops(N->op_begin(), N->op_end()); + if (ExtraOper.getNode()) + Ops.push_back(ExtraOper); + + SDVTList VTList = DAG->getVTList(VTs); + MachineSDNode::mmo_iterator Begin = nullptr, End = nullptr; + MachineSDNode *MN = dyn_cast<MachineSDNode>(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + + DAG->MorphNodeTo(N, N->getOpcode(), VTList, Ops); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); +} + +static bool AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) { + SDNode *GlueDestNode = Glue.getNode(); + + // Don't add glue from a node to itself. + if (GlueDestNode == N) return false; + + // Don't add a glue operand to something that already uses glue. + if (GlueDestNode && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + return false; + } + // Don't add glue to something that already has a glue value. + if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return false; + + SmallVector<EVT, 4> VTs(N->value_begin(), N->value_end()); + if (AddGlue) + VTs.push_back(MVT::Glue); + + CloneNodeWithValues(N, DAG, VTs, Glue); + + return true; +} + +// Cleanup after unsuccessful AddGlue. Use the standard method of morphing the +// node even though simply shrinking the value list is sufficient. +static void RemoveUnusedGlue(SDNode *N, SelectionDAG *DAG) { + assert((N->getValueType(N->getNumValues() - 1) == MVT::Glue && + !N->hasAnyUseOfValue(N->getNumValues() - 1)) && + "expected an unused glue value"); + + CloneNodeWithValues(N, DAG, + makeArrayRef(N->value_begin(), N->getNumValues() - 1)); +} + +/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them. +/// This function finds loads of the same base and different offsets. If the +/// offsets are not far apart (target specific), it add MVT::Glue inputs and +/// outputs to ensure they are scheduled together and in order. This +/// optimization may benefit some targets by improving cache locality. +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = nullptr; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. + SmallPtrSet<SDNode*, 16> Visited; + SmallVector<int64_t, 4> Offsets; + DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode. + bool Cluster = false; + SDNode *Base = Node; + // This algorithm requires a reasonably low use count before finding a match + // to avoid uselessly blowing up compile time in large blocks. + unsigned UseCount = 0; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E && UseCount < 100; ++I, ++UseCount) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User).second) + continue; + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. + continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + // Reset UseCount to allow more matches. + UseCount = 0; + } + + if (!Cluster) + return; + + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector<SDNode*, 4> Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } + + if (NumLoads == 0) + return; + + // Cluster loads by adding MVT::Glue outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + SDValue InGlue = SDValue(nullptr, 0); + if (AddGlue(Lead, InGlue, true, DAG)) + InGlue = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutGlue = I < E - 1; + SDNode *Load = Loads[I]; + + // If AddGlue fails, we could leave an unsused glue value. This should not + // cause any + if (AddGlue(Load, InGlue, OutGlue, DAG)) { + if (OutGlue) + InGlue = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } + else if (!OutGlue && InGlue.getNode()) + RemoveUnusedGlue(InGlue.getNode(), DAG); + } +} + +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SDNode &NI : DAG->allnodes()) { + SDNode *Node = &NI; + if (!Node || !Node->isMachineOpcode()) + continue; + + unsigned Opc = Node->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + if (MCID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); + } +} + +void ScheduleDAGSDNodes::BuildSchedUnits() { + // During scheduling, the NodeId field of SDNode is used to map SDNodes + // to their associated SUnits by holding SUnits table indices. A value + // of -1 means the SDNode does not yet have an associated SUnit. + unsigned NumNodes = 0; + for (SDNode &NI : DAG->allnodes()) { + NI.setNodeId(-1); + ++NumNodes; + } + + // Reserve entries in the vector for each of the SUnits we are creating. This + // ensure that reallocation of the vector won't happen, so SUnit*'s won't get + // invalidated. + // FIXME: Multiply by 2 because we may clone nodes during scheduling. + // This is a temporary workaround. + SUnits.reserve(NumNodes * 2); + + // Add all nodes in depth first order. + SmallVector<SDNode*, 64> Worklist; + SmallPtrSet<SDNode*, 64> Visited; + Worklist.push_back(DAG->getRoot().getNode()); + Visited.insert(DAG->getRoot().getNode()); + + SmallVector<SUnit*, 8> CallSUnits; + while (!Worklist.empty()) { + SDNode *NI = Worklist.pop_back_val(); + + // Add all operands to the worklist unless they've already been added. + for (const SDValue &Op : NI->op_values()) + if (Visited.insert(Op.getNode()).second) + Worklist.push_back(Op.getNode()); + + if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate. + continue; + + // If this node has already been processed, stop now. + if (NI->getNodeId() != -1) continue; + + SUnit *NodeSUnit = newSUnit(NI); + + // See if anything is glued to this node, if so, add them to glued + // nodes. Nodes can have at most one glue input and one glue output. Glue + // is required to be the last operand and result of a node. + + // Scan up to find glued preds. + SDNode *N = NI; + while (N->getNumOperands() && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) { + N = N->getOperand(N->getNumOperands()-1).getNode(); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; + } + + // Scan down to find any glued succs. + N = NI; + while (N->getValueType(N->getNumValues()-1) == MVT::Glue) { + SDValue GlueVal(N, N->getNumValues()-1); + + // There are either zero or one users of the Glue result. + bool HasGlueUse = false; + for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); + UI != E; ++UI) + if (GlueVal.isOperandOf(*UI)) { + HasGlueUse = true; + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + N = *UI; + if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall()) + NodeSUnit->isCall = true; + break; + } + if (!HasGlueUse) break; + } + + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + + // If there are glue operands involved, N is now the bottom-most node + // of the sequence of nodes that are glued together. + // Update the SUnit. + NodeSUnit->setNode(N); + assert(N->getNodeId() == -1 && "Node already inserted!"); + N->setNodeId(NodeSUnit->NodeNum); + + // Compute NumRegDefsLeft. This must be done before AddSchedEdges. + InitNumRegDefsLeft(NodeSUnit); + + // Assign the Latency field of NodeSUnit using target-provided information. + computeLatency(NodeSUnit); + } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } +} + +void ScheduleDAGSDNodes::AddSchedEdges() { + const TargetSubtargetInfo &ST = MF.getSubtarget(); + + // Check to see if the scheduler cares about latencies. + bool UnitLatencies = forceUnitLatencies(); + + // Pass 2: add the preds, succs, etc. + for (unsigned su = 0, e = SUnits.size(); su != e; ++su) { + SUnit *SU = &SUnits[su]; + SDNode *MainNode = SU->getNode(); + + if (MainNode->isMachineOpcode()) { + unsigned Opc = MainNode->getMachineOpcode(); + const MCInstrDesc &MCID = TII->get(Opc); + for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { + if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { + SU->isTwoAddress = true; + break; + } + } + if (MCID.isCommutable()) + SU->isCommutable = true; + } + + // Find all predecessors and successors of the group. + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { + if (N->isMachineOpcode() && + TII->get(N->getMachineOpcode()).getImplicitDefs()) { + SU->hasPhysRegClobbers = true; + unsigned NumUsed = InstrEmitter::CountResults(N); + while (NumUsed != 0 && !N->hasAnyUseOfValue(NumUsed - 1)) + --NumUsed; // Skip over unused values at the end. + if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs()) + SU->hasPhysRegDefs = true; + } + + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + SDNode *OpN = N->getOperand(i).getNode(); + if (isPassiveNode(OpN)) continue; // Not scheduled. + SUnit *OpSU = &SUnits[OpN->getNodeId()]; + assert(OpSU && "Node has no SUnit!"); + if (OpSU == SU) continue; // In the same group. + + EVT OpVT = N->getOperand(i).getValueType(); + assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!"); + bool isChain = OpVT == MVT::Other; + + unsigned PhysReg = 0; + int Cost = 1; + // Determine if this is a physical register dependency. + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); + assert((PhysReg == 0 || !isChain) && + "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0 && !StressSched) + PhysReg = 0; + + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + + SDep Dep = isChain ? SDep(OpSU, SDep::Barrier) + : SDep(OpSU, SDep::Data, PhysReg); + Dep.setLatency(OpLatency); + if (!isChain && !UnitLatencies) { + computeOperandLatency(OpN, N, i, Dep); + ST.adjustSchedDependency(OpSU, SU, Dep); + } + + if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { + // Multiple register uses are combined in the same SUnit. For example, + // we could have a set of glued nodes with all their defs consumed by + // another set of glued nodes. Register pressure tracking sees this as + // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. + --OpSU->NumRegDefsLeft; + } + } + } + } +} + +/// BuildSchedGraph - Build the SUnit graph from the selection dag that we +/// are input. This SUnit graph is similar to the SelectionDAG, but +/// excludes nodes that aren't interesting to scheduling, and represents +/// glued together nodes with a single SUnit. +void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); + // Populate the SUnits array. + BuildSchedUnits(); + // Compute all the scheduling dependencies between nodes. + AddSchedEdges(); +} + +// Initialize NumNodeDefs for the current Node's opcode. +void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + + if (!Node->isMachineOpcode()) { + if (Node->getOpcode() == ISD::CopyFromReg) + NodeNumDefs = 1; + else + NodeNumDefs = 0; + return; + } + unsigned POpc = Node->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) { + // No register need be allocated for this. + NodeNumDefs = 0; + return; + } + if (POpc == TargetOpcode::PATCHPOINT && + Node->getValueType(0) == MVT::Other) { + // PATCHPOINT is defined to have one result, but it might really have none + // if we're not using CallingConv::AnyReg. Don't mistake the chain for a + // real definition. + NodeNumDefs = 0; + return; + } + unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs(); + // Some instructions define regs that are not represented in the selection DAG + // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues. + NodeNumDefs = std::min(Node->getNumValues(), NRegDefs); + DefIdx = 0; +} + +// Construct a RegDefIter for this SUnit and find the first valid value. +ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU, + const ScheduleDAGSDNodes *SD) + : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) { + InitNodeNumDefs(); + Advance(); +} + +// Advance to the next valid value defined by the SUnit. +void ScheduleDAGSDNodes::RegDefIter::Advance() { + for (;Node;) { // Visit all glued nodes. + for (;DefIdx < NodeNumDefs; ++DefIdx) { + if (!Node->hasAnyUseOfValue(DefIdx)) + continue; + ValueType = Node->getSimpleValueType(DefIdx); + ++DefIdx; + return; // Found a normal regdef. + } + Node = Node->getGluedNode(); + if (!Node) { + return; // No values left to visit. + } + InitNodeNumDefs(); + } +} + +void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { + assert(SU->NumRegDefsLeft == 0 && "expect a new node"); + for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) { + assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected"); + ++SU->NumRegDefsLeft; + } +} + +void ScheduleDAGSDNodes::computeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + + // Check to see if the scheduler cares about latencies. + if (forceUnitLatencies()) { + SU->Latency = 1; + return; + } + + if (!InstrItins || InstrItins->isEmpty()) { + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; + return; + } + + // Compute the latency for the node. We use the sum of the latencies for + // all nodes glued together into this SUnit. + SU->Latency = 0; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + if (N->isMachineOpcode()) + SU->Latency += TII->getInstrLatency(InstrItins, N); +} + +void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const{ + // Check to see if the scheduler cares about latencies. + if (forceUnitLatencies()) + return; + + if (dep.getKind() != SDep::Data) + return; + + unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); + if (Use->isMachineOpcode()) + // Adjust the use operand index by num of defs. + OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); + int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); + if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && + !BB->succ_empty()) { + unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + // This copy is a liveout value. It is likely coalesced, so reduce the + // latency so not to penalize the def. + // FIXME: need target specific adjustment here? + Latency = (Latency > 1) ? Latency - 1 : 1; + } + if (Latency >= 0) + dep.setLatency(Latency); +} + +void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + if (!SU->getNode()) { + dbgs() << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); + dbgs() << "\n"; + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + dbgs() << " "; + GluedNodes.back()->dump(DAG); + dbgs() << "\n"; + GluedNodes.pop_back(); + } +#endif +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void ScheduleDAGSDNodes::dumpSchedule() const { + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + if (SUnit *SU = Sequence[i]) + SU->dump(this); + else + dbgs() << "**** NOOP ****\n"; + } +} +#endif + +#ifndef NDEBUG +/// VerifyScheduledSequence - Verify that all SUnits were scheduled and that +/// their state is consistent with the nodes listed in Sequence. +/// +void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { + unsigned ScheduledNodes = ScheduleDAG::VerifyScheduledDAG(isBottomUp); + unsigned Noops = 0; + for (unsigned i = 0, e = Sequence.size(); i != e; ++i) + if (!Sequence[i]) + ++Noops; + assert(Sequence.size() - Noops == ScheduledNodes && + "The number of nodes scheduled doesn't match the expected number!"); +} +#endif // NDEBUG + +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. +static void +ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + DenseMap<SDValue, unsigned> &VRBaseMap, unsigned Order) { + if (!N->getHasDebugValue()) + return; + + // Opportunistically insert immediate dbg_value uses, i.e. those with source + // order number right after the N. + MachineBasicBlock *BB = Emitter.getBlock(); + MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos(); + ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N); + for (unsigned i = 0, e = DVs.size(); i != e; ++i) { + if (DVs[i]->isInvalidated()) + continue; + unsigned DVOrder = DVs[i]->getOrder(); + if (!Order || DVOrder == ++Order) { + MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap); + if (DbgMI) { + Orders.push_back(std::make_pair(DVOrder, DbgMI)); + BB->insert(InsertPos, DbgMI); + } + DVs[i]->setIsInvalidated(); + } + } +} + +// ProcessSourceNode - Process nodes with source order numbers. These are added +// to a vector which EmitSchedule uses to determine how to insert dbg_value +// instructions in the right order. +static void +ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + DenseMap<SDValue, unsigned> &VRBaseMap, + SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders, + SmallSet<unsigned, 8> &Seen) { + unsigned Order = N->getIROrder(); + if (!Order || !Seen.insert(Order).second) { + // Process any valid SDDbgValues even if node does not have any order + // assigned. + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0); + return; + } + + MachineBasicBlock *BB = Emitter.getBlock(); + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + // Fast-isel may have inserted some instructions, in which case the + // BB->back().isPHI() test will not fire when we want it to. + std::prev(Emitter.getInsertPos())->isPHI()) { + // Did not insert any instruction. + Orders.push_back(std::make_pair(Order, (MachineInstr*)nullptr)); + return; + } + + Orders.push_back(std::make_pair(Order, std::prev(Emitter.getInsertPos()))); + ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order); +} + +void ScheduleDAGSDNodes:: +EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos) { + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->CopyDstRC) { + // Copy to physical register. + DenseMap<SUnit*, unsigned>::iterator VRI = VRBaseMap.find(I->getSUnit()); + assert(VRI != VRBaseMap.end() && "Node emitted out of order - late"); + // Find the destination physical register. + unsigned Reg = 0; + for (SUnit::const_succ_iterator II = SU->Succs.begin(), + EE = SU->Succs.end(); II != EE; ++II) { + if (II->isCtrl()) continue; // ignore chain preds + if (II->getReg()) { + Reg = II->getReg(); + break; + } + } + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); + } else { + // Copy from physical register. + assert(I->getReg() && "Unknown physical register!"); + unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC); + bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; + (void)isNew; // Silence compiler warning. + assert(isNew && "Node emitted out of order - early"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); + } + break; + } +} + +/// EmitSchedule - Emit the machine code in scheduled order. Return the new +/// InsertPos and MachineBasicBlock that contains this insertion +/// point. ScheduleDAGSDNodes holds a BB pointer for convenience, but this does +/// not necessarily refer to returned BB. The emitter may split blocks. +MachineBasicBlock *ScheduleDAGSDNodes:: +EmitSchedule(MachineBasicBlock::iterator &InsertPos) { + InstrEmitter Emitter(BB, InsertPos); + DenseMap<SDValue, unsigned> VRBaseMap; + DenseMap<SUnit*, unsigned> CopyVRBaseMap; + SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders; + SmallSet<unsigned, 8> Seen; + bool HasDbg = DAG->hasDebugValues(); + + // If this is the first BB, emit byval parameter dbg_value's. + if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) { + SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator PDE = DAG->ByvalParmDbgEnd(); + for (; PDI != PDE; ++PDI) { + MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); + if (DbgMI) + BB->insert(InsertPos, DbgMI); + } + } + + for (unsigned i = 0, e = Sequence.size(); i != e; i++) { + SUnit *SU = Sequence[i]; + if (!SU) { + // Null SUnit* is a noop. + TII->insertNoop(*Emitter.getBlock(), InsertPos); + continue; + } + + // For pre-regalloc scheduling, create instructions corresponding to the + // SDNode and any glued SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap, InsertPos); + continue; + } + + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + SDNode *N = GluedNodes.back(); + Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen); + GluedNodes.pop_back(); + } + Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, + VRBaseMap); + // Remember the source order of the inserted instruction. + if (HasDbg) + ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, + Seen); + } + + // Insert all the dbg_values which have not already been inserted in source + // order sequence. + if (HasDbg) { + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); + + // Sort the source order instructions and use the order to insert debug + // values. + std::sort(Orders.begin(), Orders.end(), less_first()); + + SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); + SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); + // Now emit the rest according to source order. + unsigned LastOrder = 0; + for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { + unsigned Order = Orders[i].first; + MachineInstr *MI = Orders[i].second; + // Insert all SDDbgValue's whose order(s) are before "Order". + if (!MI) + continue; + for (; DI != DE && + (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) { + if ((*DI)->isInvalidated()) + continue; + MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap); + if (DbgMI) { + if (!LastOrder) + // Insert to start of the BB (after PHIs). + BB->insert(BBBegin, DbgMI); + else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. + MachineBasicBlock::iterator Pos = MI; + MI->getParent()->insert(Pos, DbgMI); + } + } + } + LastOrder = Order; + } + // Add trailing DbgValue's before the terminator. FIXME: May want to add + // some of them before one or more conditional branches? + SmallVector<MachineInstr*, 8> DbgMIs; + while (DI != DE) { + if (!(*DI)->isInvalidated()) + if (MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap)) + DbgMIs.push_back(DbgMI); + ++DI; + } + + MachineBasicBlock *InsertBB = Emitter.getBlock(); + MachineBasicBlock::iterator Pos = InsertBB->getFirstTerminator(); + InsertBB->insert(Pos, DbgMIs.begin(), DbgMIs.end()); + } + + InsertPos = Emitter.getInsertPos(); + return Emitter.getBlock(); +} + +/// Return the basic block label. +std::string ScheduleDAGSDNodes::getDAGName() const { + return "sunit-dag." + BB->getFullName(); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h new file mode 100644 index 0000000..5cc8066 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -0,0 +1,180 @@ +//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ScheduleDAGSDNodes class, which implements +// scheduling for an SDNode-based dependency graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SCHEDULEDAGSDNODES_H + +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/ScheduleDAG.h" + +namespace llvm { + /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. + /// + /// Edges between SUnits are initially based on edges in the SelectionDAG, + /// and additional edges can be added by the schedulers as heuristics. + /// SDNodes such as Constants, Registers, and a few others that are not + /// interesting to schedulers are not allocated SUnits. + /// + /// SDNodes with MVT::Glue operands are grouped along with the flagged + /// nodes into a single SUnit so that they are scheduled together. + /// + /// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output + /// edges. Physical register dependence information is not carried in + /// the DAG and must be handled explicitly by schedulers. + /// + class ScheduleDAGSDNodes : public ScheduleDAG { + public: + MachineBasicBlock *BB; + SelectionDAG *DAG; // DAG of the current basic block + const InstrItineraryData *InstrItins; + + /// The schedule. Null SUnit*'s represent noop instructions. + std::vector<SUnit*> Sequence; + + explicit ScheduleDAGSDNodes(MachineFunction &mf); + + ~ScheduleDAGSDNodes() override {} + + /// Run - perform scheduling. + /// + void Run(SelectionDAG *dag, MachineBasicBlock *bb); + + /// isPassiveNode - Return true if the node is a non-scheduled leaf. + /// + static bool isPassiveNode(SDNode *Node) { + if (isa<ConstantSDNode>(Node)) return true; + if (isa<ConstantFPSDNode>(Node)) return true; + if (isa<RegisterSDNode>(Node)) return true; + if (isa<RegisterMaskSDNode>(Node)) return true; + if (isa<GlobalAddressSDNode>(Node)) return true; + if (isa<BasicBlockSDNode>(Node)) return true; + if (isa<FrameIndexSDNode>(Node)) return true; + if (isa<ConstantPoolSDNode>(Node)) return true; + if (isa<TargetIndexSDNode>(Node)) return true; + if (isa<JumpTableSDNode>(Node)) return true; + if (isa<ExternalSymbolSDNode>(Node)) return true; + if (isa<MCSymbolSDNode>(Node)) return true; + if (isa<BlockAddressSDNode>(Node)) return true; + if (Node->getOpcode() == ISD::EntryToken || + isa<MDNodeSDNode>(Node)) return true; + return false; + } + + /// NewSUnit - Creates a new SUnit and return a ptr to it. + /// + SUnit *newSUnit(SDNode *N); + + /// Clone - Creates a clone of the specified SUnit. It does not copy the + /// predecessors / successors info nor the temporary scheduling states. + /// + SUnit *Clone(SUnit *N); + + /// BuildSchedGraph - Build the SUnit graph from the selection dag that we + /// are input. This SUnit graph is similar to the SelectionDAG, but + /// excludes nodes that aren't interesting to scheduling, and represents + /// flagged together nodes with a single SUnit. + void BuildSchedGraph(AliasAnalysis *AA); + + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. + /// + void InitNumRegDefsLeft(SUnit *SU); + + /// computeLatency - Compute node latency. + /// + virtual void computeLatency(SUnit *SU); + + virtual void computeOperandLatency(SDNode *Def, SDNode *Use, + unsigned OpIdx, SDep& dep) const; + + /// Schedule - Order nodes according to selected style, filling + /// in the Sequence member. + /// + virtual void Schedule() = 0; + + /// VerifyScheduledSequence - Verify that all SUnits are scheduled and + /// consistent with the Sequence of scheduled instructions. + void VerifyScheduledSequence(bool isBottomUp); + + /// EmitSchedule - Insert MachineInstrs into the MachineBasicBlock + /// according to the order specified in Sequence. + /// + virtual MachineBasicBlock* + EmitSchedule(MachineBasicBlock::iterator &InsertPos); + + void dumpNode(const SUnit *SU) const override; + + void dumpSchedule() const; + + std::string getGraphNodeLabel(const SUnit *SU) const override; + + std::string getDAGName() const override; + + virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const; + + /// RegDefIter - In place iteration over the values defined by an + /// SUnit. This does not need copies of the iterator or any other STLisms. + /// The iterator creates itself, rather than being provided by the SchedDAG. + class RegDefIter { + const ScheduleDAGSDNodes *SchedDAG; + const SDNode *Node; + unsigned DefIdx; + unsigned NodeNumDefs; + MVT ValueType; + public: + RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); + + bool IsValid() const { return Node != nullptr; } + + MVT GetValue() const { + assert(IsValid() && "bad iterator"); + return ValueType; + } + + const SDNode *GetNode() const { + return Node; + } + + unsigned GetIdx() const { + return DefIdx-1; + } + + void Advance(); + private: + void InitNodeNumDefs(); + }; + + protected: + /// ForceUnitLatencies - Return true if all scheduling edges should be given + /// a latency value of one. The default is to return false; schedulers may + /// override this as needed. + virtual bool forceUnitLatencies() const { return false; } + + private: + /// ClusterNeighboringLoads - Cluster loads from "near" addresses into + /// combined SUnits. + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); + + /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. + void BuildSchedUnits(); + void AddSchedEdges(); + + void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap, + MachineBasicBlock::iterator InsertPos); + }; +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp new file mode 100644 index 0000000..eee4a4b --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -0,0 +1,279 @@ +//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a top-down list scheduler, using standard algorithms. +// The basic approach uses a priority queue of available nodes to schedule. +// One at a time, nodes are taken from the priority queue (thus in priority +// order), checked for legality to schedule, and emitted if legal. +// +// Nodes may not be legal to schedule either due to structural hazards (e.g. +// pipeline or resource constraints) or because an input to the instruction has +// not completed execution. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <climits> +using namespace llvm; + +#define DEBUG_TYPE "pre-RA-sched" + +STATISTIC(NumNoops , "Number of noops inserted"); +STATISTIC(NumStalls, "Number of pipeline stalls"); + +static RegisterScheduler + VLIWScheduler("vliw-td", "VLIW scheduler", + createVLIWDAGScheduler); + +namespace { +//===----------------------------------------------------------------------===// +/// ScheduleDAGVLIW - The actual DFA list scheduler implementation. This +/// supports / top-down scheduling. +/// +class ScheduleDAGVLIW : public ScheduleDAGSDNodes { +private: + /// AvailableQueue - The priority queue to use for the available SUnits. + /// + SchedulingPriorityQueue *AvailableQueue; + + /// PendingQueue - This contains all of the instructions whose operands have + /// been issued, but their results are not ready yet (due to the latency of + /// the operation). Once the operands become available, the instruction is + /// added to the AvailableQueue. + std::vector<SUnit*> PendingQueue; + + /// HazardRec - The hazard recognizer to use. + ScheduleHazardRecognizer *HazardRec; + + /// AA - AliasAnalysis for making memory reference queries. + AliasAnalysis *AA; + +public: + ScheduleDAGVLIW(MachineFunction &mf, + AliasAnalysis *aa, + SchedulingPriorityQueue *availqueue) + : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { + const TargetSubtargetInfo &STI = mf.getSubtarget(); + HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this); + } + + ~ScheduleDAGVLIW() override { + delete HazardRec; + delete AvailableQueue; + } + + void Schedule() override; + +private: + void releaseSucc(SUnit *SU, const SDep &D); + void releaseSuccessors(SUnit *SU); + void scheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void listScheduleTopDown(); +}; +} // end anonymous namespace + +/// Schedule - Schedule the DAG using list scheduling. +void ScheduleDAGVLIW::Schedule() { + DEBUG(dbgs() + << "********** List Scheduling BB#" << BB->getNumber() + << " '" << BB->getName() << "' **********\n"); + + // Build the scheduling graph. + BuildSchedGraph(AA); + + AvailableQueue->initNodes(SUnits); + + listScheduleTopDown(); + + AvailableQueue->releaseState(); +} + +//===----------------------------------------------------------------------===// +// Top-Down Scheduling +//===----------------------------------------------------------------------===// + +/// releaseSucc - Decrement the NumPredsLeft count of a successor. Add it to +/// the PendingQueue if the count reaches zero. Also update its cycle bound. +void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { + SUnit *SuccSU = D.getSUnit(); + +#ifndef NDEBUG + if (SuccSU->NumPredsLeft == 0) { + dbgs() << "*** Scheduling failed! ***\n"; + SuccSU->dump(this); + dbgs() << " has been released too many times!\n"; + llvm_unreachable(nullptr); + } +#endif + assert(!D.isWeak() && "unexpected artificial DAG edge"); + + --SuccSU->NumPredsLeft; + + SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); + + // If all the node's predecessors are scheduled, this node is ready + // to be scheduled. Ignore the special ExitSU node. + if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { + PendingQueue.push_back(SuccSU); + } +} + +void ScheduleDAGVLIW::releaseSuccessors(SUnit *SU) { + // Top down: release successors. + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); + I != E; ++I) { + assert(!I->isAssignedRegDep() && + "The list-td scheduler doesn't yet support physreg dependencies!"); + + releaseSucc(SU, *I); + } +} + +/// scheduleNodeTopDown - Add the node to the schedule. Decrement the pending +/// count of its successors. If a successor pending count is zero, add it to +/// the Available queue. +void ScheduleDAGVLIW::scheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { + DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); + DEBUG(SU->dump(this)); + + Sequence.push_back(SU); + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); + SU->setDepthToAtLeast(CurCycle); + + releaseSuccessors(SU); + SU->isScheduled = true; + AvailableQueue->scheduledNode(SU); +} + +/// listScheduleTopDown - The main loop of list scheduling for top-down +/// schedulers. +void ScheduleDAGVLIW::listScheduleTopDown() { + unsigned CurCycle = 0; + + // Release any successors of the special Entry node. + releaseSuccessors(&EntrySU); + + // All leaves to AvailableQueue. + for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { + // It is available if it has no predecessors. + if (SUnits[i].Preds.empty()) { + AvailableQueue->push(&SUnits[i]); + SUnits[i].isAvailable = true; + } + } + + // While AvailableQueue is not empty, grab the node with the highest + // priority. If it is not ready put it back. Schedule the node. + std::vector<SUnit*> NotReady; + Sequence.reserve(SUnits.size()); + while (!AvailableQueue->empty() || !PendingQueue.empty()) { + // Check to see if any of the pending instructions are ready to issue. If + // so, add them to the available queue. + for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { + if (PendingQueue[i]->getDepth() == CurCycle) { + AvailableQueue->push(PendingQueue[i]); + PendingQueue[i]->isAvailable = true; + PendingQueue[i] = PendingQueue.back(); + PendingQueue.pop_back(); + --i; --e; + } + else { + assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); + } + } + + // If there are no instructions available, don't try to issue anything, and + // don't advance the hazard recognizer. + if (AvailableQueue->empty()) { + // Reset DFA state. + AvailableQueue->scheduledNode(nullptr); + ++CurCycle; + continue; + } + + SUnit *FoundSUnit = nullptr; + + bool HasNoopHazards = false; + while (!AvailableQueue->empty()) { + SUnit *CurSUnit = AvailableQueue->pop(); + + ScheduleHazardRecognizer::HazardType HT = + HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); + if (HT == ScheduleHazardRecognizer::NoHazard) { + FoundSUnit = CurSUnit; + break; + } + + // Remember if this is a noop hazard. + HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; + + NotReady.push_back(CurSUnit); + } + + // Add the nodes that aren't ready back onto the available list. + if (!NotReady.empty()) { + AvailableQueue->push_all(NotReady); + NotReady.clear(); + } + + // If we found a node to schedule, do it now. + if (FoundSUnit) { + scheduleNodeTopDown(FoundSUnit, CurCycle); + HazardRec->EmitInstruction(FoundSUnit); + + // If this is a pseudo-op node, we don't want to increment the current + // cycle. + if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! + ++CurCycle; + } else if (!HasNoopHazards) { + // Otherwise, we have a pipeline stall, but no other problem, just advance + // the current cycle and try again. + DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); + HazardRec->AdvanceCycle(); + ++NumStalls; + ++CurCycle; + } else { + // Otherwise, we have no instructions to issue and we have instructions + // that will fault if we don't do this right. This is the case for + // processors without pipeline interlocks and other cases. + DEBUG(dbgs() << "*** Emitting noop\n"); + HazardRec->EmitNoop(); + Sequence.push_back(nullptr); // NULL here means noop + ++NumNoops; + ++CurCycle; + } + } + +#ifndef NDEBUG + VerifyScheduledSequence(/*isBottomUp=*/false); +#endif +} + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +/// createVLIWDAGScheduler - This creates a top-down list scheduler. +ScheduleDAGSDNodes * +llvm::createVLIWDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { + return new ScheduleDAGVLIW(*IS->MF, IS->AA, new ResourcePriorityQueue(IS)); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp new file mode 100644 index 0000000..96bf914 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -0,0 +1,7370 @@ +//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/APSInt.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <cmath> +#include <utility> + +using namespace llvm; + +/// makeVTList - Return an instance of the SDVTList struct initialized with the +/// specified members. +static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { + SDVTList Res = {VTs, NumVTs}; + return Res; +} + +// Default null implementations of the callbacks. +void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} +void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} + +//===----------------------------------------------------------------------===// +// ConstantFPSDNode Class +//===----------------------------------------------------------------------===// + +/// isExactlyValue - We don't rely on operator== working on double values, as +/// it returns true for things that are clearly not equal, like -0.0 and 0.0. +/// As such, this method can be used to do an exact bit-for-bit comparison of +/// two floating point values. +bool ConstantFPSDNode::isExactlyValue(const APFloat& V) const { + return getValueAPF().bitwiseIsEqual(V); +} + +bool ConstantFPSDNode::isValueValidForType(EVT VT, + const APFloat& Val) { + assert(VT.isFloatingPoint() && "Can only convert between FP types"); + + // convert modifies in place, so make a copy. + APFloat Val2 = APFloat(Val); + bool losesInfo; + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, + &losesInfo); + return !losesInfo; +} + +//===----------------------------------------------------------------------===// +// ISD Namespace +//===----------------------------------------------------------------------===// + +/// isBuildVectorAllOnes - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are ~0 or undef. +bool ISD::isBuildVectorAllOnes(const SDNode *N) { + // Look through a bit convert. + while (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + unsigned i = 0, e = N->getNumOperands(); + + // Skip over all of the undef values. + while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) + ++i; + + // Do not accept an all-undef vector. + if (i == e) return false; + + // Do not accept build_vectors that aren't all constants or which have non-~0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target and + // a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all ones, not whether the individual + // constants are. + SDValue NotZero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) { + if (CN->getAPIntValue().countTrailingOnes() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(NotZero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingOnes() < EltSize) + return false; + } else + return false; + + // Okay, we have at least one ~0 value, check to see if the rest match or are + // undefs. Even with the above element type twiddling, this should be OK, as + // the same type legalization should have applied to all the elements. + for (++i; i != e; ++i) + if (N->getOperand(i) != NotZero && + N->getOperand(i).getOpcode() != ISD::UNDEF) + return false; + return true; +} + + +/// isBuildVectorAllZeros - Return true if the specified node is a +/// BUILD_VECTOR where all of the elements are 0 or undef. +bool ISD::isBuildVectorAllZeros(const SDNode *N) { + // Look through a bit convert. + while (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0).getNode(); + + if (N->getOpcode() != ISD::BUILD_VECTOR) return false; + + bool IsAllUndef = true; + for (const SDValue &Op : N->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) + continue; + IsAllUndef = false; + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target + // and a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all zeros, not whether the individual + // constants are. + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) { + if (CN->getAPIntValue().countTrailingZeros() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Op)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) + return false; + } else + return false; + } + + // Do not accept an all-undef vector. + if (IsAllUndef) + return false; + return true; +} + +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantSDNode or undef. +bool ISD::isBuildVectorOfConstantSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (const SDValue &Op : N->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantSDNode>(Op)) + return false; + } + return true; +} + +/// \brief Return true if the specified node is a BUILD_VECTOR node of +/// all ConstantFPSDNode or undef. +bool ISD::isBuildVectorOfConstantFPSDNodes(const SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + for (const SDValue &Op : N->op_values()) { + if (Op.getOpcode() == ISD::UNDEF) + continue; + if (!isa<ConstantFPSDNode>(Op)) + return false; + } + return true; +} + +/// allOperandsUndef - Return true if the node has at least one operand +/// and all operands of the specified node are ISD::UNDEF. +bool ISD::allOperandsUndef(const SDNode *N) { + // Return false if the node has no operands. + // This is "logically inconsistent" with the definition of "all" but + // is probably the desired behavior. + if (N->getNumOperands() == 0) + return false; + + for (const SDValue &Op : N->op_values()) + if (Op.getOpcode() != ISD::UNDEF) + return false; + + return true; +} + +ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { + switch (ExtType) { + case ISD::EXTLOAD: + return IsFP ? ISD::FP_EXTEND : ISD::ANY_EXTEND; + case ISD::SEXTLOAD: + return ISD::SIGN_EXTEND; + case ISD::ZEXTLOAD: + return ISD::ZERO_EXTEND; + default: + break; + } + + llvm_unreachable("Invalid LoadExtType"); +} + +/// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) +/// when given the operation for (X op Y). +ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { + // To perform this operation, we just need to swap the L and G bits of the + // operation. + unsigned OldL = (Operation >> 2) & 1; + unsigned OldG = (Operation >> 1) & 1; + return ISD::CondCode((Operation & ~6) | // Keep the N, U, E bits + (OldL << 1) | // New G bit + (OldG << 2)); // New L bit. +} + +/// getSetCCInverse - Return the operation corresponding to !(X op Y), where +/// 'op' is a valid SetCC operation. +ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) { + unsigned Operation = Op; + if (isInteger) + Operation ^= 7; // Flip L, G, E bits, but not U. + else + Operation ^= 15; // Flip all of the condition bits. + + if (Operation > ISD::SETTRUE2) + Operation &= ~8; // Don't let N and U bits get set. + + return ISD::CondCode(Operation); +} + + +/// isSignedOp - For an integer comparison, return 1 if the comparison is a +/// signed operation and 2 if the result is an unsigned comparison. Return zero +/// if the operation does not depend on the sign of the input (setne and seteq). +static int isSignedOp(ISD::CondCode Opcode) { + switch (Opcode) { + default: llvm_unreachable("Illegal integer setcc operation!"); + case ISD::SETEQ: + case ISD::SETNE: return 0; + case ISD::SETLT: + case ISD::SETLE: + case ISD::SETGT: + case ISD::SETGE: return 1; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETUGT: + case ISD::SETUGE: return 2; + } +} + +/// getSetCCOrOperation - Return the result of a logical OR between different +/// comparisons of identical values: ((X op1 Y) | (X op2 Y)). This function +/// returns SETCC_INVALID if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCOrOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed integer setcc with an unsigned integer setcc. + return ISD::SETCC_INVALID; + + unsigned Op = Op1 | Op2; // Combine all of the condition bits. + + // If the N and U bits get set then the resultant comparison DOES suddenly + // care about orderedness, and is true when ordered. + if (Op > ISD::SETTRUE2) + Op &= ~16; // Clear the U bit if the N bit is set. + + // Canonicalize illegal integer setcc's. + if (isInteger && Op == ISD::SETUNE) // e.g. SETUGT | SETULT + Op = ISD::SETNE; + + return ISD::CondCode(Op); +} + +/// getSetCCAndOperation - Return the result of a logical AND between different +/// comparisons of identical values: ((X op1 Y) & (X op2 Y)). This +/// function returns zero if it is not possible to represent the resultant +/// comparison. +ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2, + bool isInteger) { + if (isInteger && (isSignedOp(Op1) | isSignedOp(Op2)) == 3) + // Cannot fold a signed setcc with an unsigned setcc. + return ISD::SETCC_INVALID; + + // Combine all of the condition bits. + ISD::CondCode Result = ISD::CondCode(Op1 & Op2); + + // Canonicalize illegal integer setcc's. + if (isInteger) { + switch (Result) { + default: break; + case ISD::SETUO : Result = ISD::SETFALSE; break; // SETUGT & SETULT + case ISD::SETOEQ: // SETEQ & SETU[LG]E + case ISD::SETUEQ: Result = ISD::SETEQ ; break; // SETUGE & SETULE + case ISD::SETOLT: Result = ISD::SETULT ; break; // SETULT & SETNE + case ISD::SETOGT: Result = ISD::SETUGT ; break; // SETUGT & SETNE + } + } + + return Result; +} + +//===----------------------------------------------------------------------===// +// SDNode Profile Support +//===----------------------------------------------------------------------===// + +/// AddNodeIDOpcode - Add the node opcode to the NodeID data. +/// +static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) { + ID.AddInteger(OpC); +} + +/// AddNodeIDValueTypes - Value type lists are intern'd so we can represent them +/// solely with their pointer. +static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) { + ID.AddPointer(VTList.VTs); +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + ArrayRef<SDValue> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } +} + +/// AddNodeIDOperands - Various routines for adding operands to the NodeID data. +/// +static void AddNodeIDOperands(FoldingSetNodeID &ID, + ArrayRef<SDUse> Ops) { + for (auto& Op : Ops) { + ID.AddPointer(Op.getNode()); + ID.AddInteger(Op.getResNo()); + } +} + +/// Add logical or fast math flag values to FoldingSetNodeID value. +static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, + const SDNodeFlags *Flags) { + if (!isBinOpWithFlags(Opcode)) + return; + + unsigned RawFlags = 0; + if (Flags) + RawFlags = Flags->getRawFlags(); + ID.AddInteger(RawFlags); +} + +static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); +} + +static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, + SDVTList VTList, ArrayRef<SDValue> OpList) { + AddNodeIDOpcode(ID, OpC); + AddNodeIDValueTypes(ID, VTList); + AddNodeIDOperands(ID, OpList); +} + +/// If this is an SDNode with special info, add this info to the NodeID data. +static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { + switch (N->getOpcode()) { + case ISD::TargetExternalSymbol: + case ISD::ExternalSymbol: + case ISD::MCSymbol: + llvm_unreachable("Should only be used on nodes with operands"); + default: break; // Normal nodes don't need extra info. + case ISD::TargetConstant: + case ISD::Constant: { + const ConstantSDNode *C = cast<ConstantSDNode>(N); + ID.AddPointer(C->getConstantIntValue()); + ID.AddBoolean(C->isOpaque()); + break; + } + case ISD::TargetConstantFP: + case ISD::ConstantFP: { + ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue()); + break; + } + case ISD::TargetGlobalAddress: + case ISD::GlobalAddress: + case ISD::TargetGlobalTLSAddress: + case ISD::GlobalTLSAddress: { + const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); + ID.AddPointer(GA->getGlobal()); + ID.AddInteger(GA->getOffset()); + ID.AddInteger(GA->getTargetFlags()); + ID.AddInteger(GA->getAddressSpace()); + break; + } + case ISD::BasicBlock: + ID.AddPointer(cast<BasicBlockSDNode>(N)->getBasicBlock()); + break; + case ISD::Register: + ID.AddInteger(cast<RegisterSDNode>(N)->getReg()); + break; + case ISD::RegisterMask: + ID.AddPointer(cast<RegisterMaskSDNode>(N)->getRegMask()); + break; + case ISD::SRCVALUE: + ID.AddPointer(cast<SrcValueSDNode>(N)->getValue()); + break; + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex()); + break; + case ISD::JumpTable: + case ISD::TargetJumpTable: + ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex()); + ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags()); + break; + case ISD::ConstantPool: + case ISD::TargetConstantPool: { + const ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(N); + ID.AddInteger(CP->getAlignment()); + ID.AddInteger(CP->getOffset()); + if (CP->isMachineConstantPoolEntry()) + CP->getMachineCPVal()->addSelectionDAGCSEId(ID); + else + ID.AddPointer(CP->getConstVal()); + ID.AddInteger(CP->getTargetFlags()); + break; + } + case ISD::TargetIndex: { + const TargetIndexSDNode *TI = cast<TargetIndexSDNode>(N); + ID.AddInteger(TI->getIndex()); + ID.AddInteger(TI->getOffset()); + ID.AddInteger(TI->getTargetFlags()); + break; + } + case ISD::LOAD: { + const LoadSDNode *LD = cast<LoadSDNode>(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + break; + } + case ISD::STORE: { + const StoreSDNode *ST = cast<StoreSDNode>(N); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + break; + } + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_STORE: { + const AtomicSDNode *AT = cast<AtomicSDNode>(N); + ID.AddInteger(AT->getMemoryVT().getRawBits()); + ID.AddInteger(AT->getRawSubclassData()); + ID.AddInteger(AT->getPointerInfo().getAddrSpace()); + break; + } + case ISD::PREFETCH: { + const MemSDNode *PF = cast<MemSDNode>(N); + ID.AddInteger(PF->getPointerInfo().getAddrSpace()); + break; + } + case ISD::VECTOR_SHUFFLE: { + const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = N->getValueType(0).getVectorNumElements(); + i != e; ++i) + ID.AddInteger(SVN->getMaskElt(i)); + break; + } + case ISD::TargetBlockAddress: + case ISD::BlockAddress: { + const BlockAddressSDNode *BA = cast<BlockAddressSDNode>(N); + ID.AddPointer(BA->getBlockAddress()); + ID.AddInteger(BA->getOffset()); + ID.AddInteger(BA->getTargetFlags()); + break; + } + } // end switch (N->getOpcode()) + + AddNodeIDFlags(ID, N); + + // Target specific memory nodes could also have address spaces to check. + if (N->isTargetMemoryOpcode()) + ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); +} + +/// AddNodeIDNode - Generic routine for adding a nodes info to the NodeID +/// data. +static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { + AddNodeIDOpcode(ID, N->getOpcode()); + // Add the return value info. + AddNodeIDValueTypes(ID, N->getVTList()); + // Add the operand info. + AddNodeIDOperands(ID, N->ops()); + + // Handle SDNode leafs with special info. + AddNodeIDCustom(ID, N); +} + +/// encodeMemSDNodeFlags - Generic routine for computing a value for use in +/// the CSE map that carries volatility, temporalness, indexing mode, and +/// extension/truncation information. +/// +static inline unsigned +encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, + bool isNonTemporal, bool isInvariant) { + assert((ConvType & 3) == ConvType && + "ConvType may not require more than 2 bits!"); + assert((AM & 7) == AM && + "AM may not require more than 3 bits!"); + return ConvType | + (AM << 2) | + (isVolatile << 5) | + (isNonTemporal << 6) | + (isInvariant << 7); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG Class +//===----------------------------------------------------------------------===// + +/// doNotCSE - Return true if CSE should not be performed for this node. +static bool doNotCSE(SDNode *N) { + if (N->getValueType(0) == MVT::Glue) + return true; // Never CSE anything that produces a flag. + + switch (N->getOpcode()) { + default: break; + case ISD::HANDLENODE: + case ISD::EH_LABEL: + return true; // Never CSE these nodes. + } + + // Check that remaining values produced are not flags. + for (unsigned i = 1, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Glue) + return true; // Never CSE anything that produces a flag. + + return false; +} + +/// RemoveDeadNodes - This method deletes all unreachable nodes in the +/// SelectionDAG. +void SelectionDAG::RemoveDeadNodes() { + // Create a dummy node (which is not added to allnodes), that adds a reference + // to the root node, preventing it from being deleted. + HandleSDNode Dummy(getRoot()); + + SmallVector<SDNode*, 128> DeadNodes; + + // Add all obviously-dead nodes to the DeadNodes worklist. + for (SDNode &Node : allnodes()) + if (Node.use_empty()) + DeadNodes.push_back(&Node); + + RemoveDeadNodes(DeadNodes); + + // If the root changed (e.g. it was a dead load, update the root). + setRoot(Dummy.getValue()); +} + +/// RemoveDeadNodes - This method deletes the unreachable nodes in the +/// given list, and any nodes that become unreachable as a result. +void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes) { + + // Process the worklist, deleting the nodes and adding their uses to the + // worklist. + while (!DeadNodes.empty()) { + SDNode *N = DeadNodes.pop_back_val(); + + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, nullptr); + + // Take the node out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Next, brutally remove the operand list. This is safe to do, as there are + // no cycles in the graph. + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Operand = Use.getNode(); + Use.set(SDValue()); + + // Now that we removed this operand, see if there are no uses of it left. + if (Operand->use_empty()) + DeadNodes.push_back(Operand); + } + + DeallocateNode(N); + } +} + +void SelectionDAG::RemoveDeadNode(SDNode *N){ + SmallVector<SDNode*, 16> DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + + RemoveDeadNodes(DeadNodes); +} + +void SelectionDAG::DeleteNode(SDNode *N) { + // First take this out of the appropriate CSE map. + RemoveNodeFromCSEMaps(N); + + // Finally, remove uses due to operands of this node, remove from the + // AllNodes list, and delete the node. + DeleteNodeNotInCSEMaps(N); +} + +void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) { + assert(N != AllNodes.begin() && "Cannot delete the entry node!"); + assert(N->use_empty() && "Cannot delete a node that is not dead!"); + + // Drop all of the operands and decrement used node's use counts. + N->DropOperands(); + + DeallocateNode(N); +} + +void SDDbgInfo::erase(const SDNode *Node) { + DbgValMapType::iterator I = DbgValMap.find(Node); + if (I == DbgValMap.end()) + return; + for (auto &Val: I->second) + Val->setIsInvalidated(); + DbgValMap.erase(I); +} + +void SelectionDAG::DeallocateNode(SDNode *N) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + + // Set the opcode to DELETED_NODE to help catch bugs when node + // memory is reallocated. + N->NodeType = ISD::DELETED_NODE; + + NodeAllocator.Deallocate(AllNodes.remove(N)); + + // If any of the SDDbgValue nodes refer to this SDNode, invalidate + // them and forget about that node. + DbgInfo->erase(N); +} + +#ifndef NDEBUG +/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid. +static void VerifySDNode(SDNode *N) { + switch (N->getOpcode()) { + default: + break; + case ISD::BUILD_PAIR: { + EVT VT = N->getValueType(0); + assert(N->getNumValues() == 1 && "Too many results!"); + assert(!VT.isVector() && (VT.isInteger() || VT.isFloatingPoint()) && + "Wrong return type!"); + assert(N->getNumOperands() == 2 && "Wrong number of operands!"); + assert(N->getOperand(0).getValueType() == N->getOperand(1).getValueType() && + "Mismatched operand types!"); + assert(N->getOperand(0).getValueType().isInteger() == VT.isInteger() && + "Wrong operand type!"); + assert(VT.getSizeInBits() == 2 * N->getOperand(0).getValueSizeInBits() && + "Wrong return type size"); + break; + } + case ISD::BUILD_VECTOR: { + assert(N->getNumValues() == 1 && "Too many results!"); + assert(N->getValueType(0).isVector() && "Wrong return type!"); + assert(N->getNumOperands() == N->getValueType(0).getVectorNumElements() && + "Wrong number of operands!"); + EVT EltVT = N->getValueType(0).getVectorElementType(); + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ++I) { + assert((I->getValueType() == EltVT || + (EltVT.isInteger() && I->getValueType().isInteger() && + EltVT.bitsLE(I->getValueType()))) && + "Wrong operand type!"); + assert(I->getValueType() == N->getOperand(0).getValueType() && + "Operands must all have the same type"); + } + break; + } + } +} +#endif // NDEBUG + +/// \brief Insert a newly allocated node into the DAG. +/// +/// Handles insertion into the all nodes list and CSE map, as well as +/// verification and other common operations when a new node is allocated. +void SelectionDAG::InsertNode(SDNode *N) { + AllNodes.push_back(N); +#ifndef NDEBUG + N->PersistentId = NextPersistentId++; + VerifySDNode(N); +#endif +} + +/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that +/// correspond to it. This is useful when we're about to delete or repurpose +/// the node. We don't want future request for structurally identical nodes +/// to return N anymore. +bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) { + bool Erased = false; + switch (N->getOpcode()) { + case ISD::HANDLENODE: return false; // noop. + case ISD::CONDCODE: + assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] && + "Cond code doesn't exist!"); + Erased = CondCodeNodes[cast<CondCodeSDNode>(N)->get()] != nullptr; + CondCodeNodes[cast<CondCodeSDNode>(N)->get()] = nullptr; + break; + case ISD::ExternalSymbol: + Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol()); + break; + case ISD::TargetExternalSymbol: { + ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N); + Erased = TargetExternalSymbols.erase( + std::pair<std::string,unsigned char>(ESN->getSymbol(), + ESN->getTargetFlags())); + break; + } + case ISD::MCSymbol: { + auto *MCSN = cast<MCSymbolSDNode>(N); + Erased = MCSymbols.erase(MCSN->getMCSymbol()); + break; + } + case ISD::VALUETYPE: { + EVT VT = cast<VTSDNode>(N)->getVT(); + if (VT.isExtended()) { + Erased = ExtendedValueTypeNodes.erase(VT); + } else { + Erased = ValueTypeNodes[VT.getSimpleVT().SimpleTy] != nullptr; + ValueTypeNodes[VT.getSimpleVT().SimpleTy] = nullptr; + } + break; + } + default: + // Remove it from the CSE Map. + assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!"); + assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!"); + Erased = CSEMap.RemoveNode(N); + break; + } +#ifndef NDEBUG + // Verify that the node was actually in one of the CSE maps, unless it has a + // flag result (which cannot be CSE'd) or is one of the special cases that are + // not subject to CSE. + if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue && + !N->isMachineOpcode() && !doNotCSE(N)) { + N->dump(this); + dbgs() << "\n"; + llvm_unreachable("Node is not in map!"); + } +#endif + return Erased; +} + +/// AddModifiedNodeToCSEMaps - The specified node has been removed from the CSE +/// maps and modified in place. Add it back to the CSE maps, unless an identical +/// node already exists, in which case transfer all its users to the existing +/// node. This transfer can potentially trigger recursive merging. +/// +void +SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) { + // For node types that aren't CSE'd, just act as if no identical node + // already exists. + if (!doNotCSE(N)) { + SDNode *Existing = CSEMap.GetOrInsertNode(N); + if (Existing != N) { + // If there was already an existing matching node, use ReplaceAllUsesWith + // to replace the dead one with the existing one. This can cause + // recursive merging of other unrelated nodes down the line. + ReplaceAllUsesWith(N, Existing); + + // N is now dead. Inform the listeners and delete it. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeDeleted(N, Existing); + DeleteNodeNotInCSEMaps(N); + return; + } + } + + // If the node doesn't already exist, we updated it. Inform listeners. + for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next) + DUL->NodeUpdated(N); +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, + void *&InsertPos) { + if (doNotCSE(N)) + return nullptr; + + SDValue Ops[] = { Op }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); + AddNodeIDCustom(ID, N); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + return Node; +} + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, + SDValue Op1, SDValue Op2, + void *&InsertPos) { + if (doNotCSE(N)) + return nullptr; + + SDValue Ops[] = { Op1, Op2 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); + AddNodeIDCustom(ID, N); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + return Node; +} + + +/// FindModifiedNodeSlot - Find a slot for the specified node if its operands +/// were replaced with those specified. If this node is never memoized, +/// return null, otherwise return a pointer to the slot it would take. If a +/// node already exists with these operands, the slot will be non-null. +SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, + void *&InsertPos) { + if (doNotCSE(N)) + return nullptr; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); + AddNodeIDCustom(ID, N); + SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + return Node; +} + +/// getEVTAlignment - Compute the default alignment value for the +/// given type. +/// +unsigned SelectionDAG::getEVTAlignment(EVT VT) const { + Type *Ty = VT == MVT::iPTR ? + PointerType::get(Type::getInt8Ty(*getContext()), 0) : + VT.getTypeForEVT(*getContext()); + + return getDataLayout().getABITypeAlignment(Ty); +} + +// EntryNode could meaningfully have debug info if we can find it... +SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) + : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(nullptr) { + InsertNode(&EntryNode); + DbgInfo = new SDDbgInfo(); +} + +void SelectionDAG::init(MachineFunction &mf) { + MF = &mf; + TLI = getSubtarget().getTargetLowering(); + TSI = getSubtarget().getSelectionDAGInfo(); + Context = &mf.getFunction()->getContext(); +} + +SelectionDAG::~SelectionDAG() { + assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); + allnodes_clear(); + delete DbgInfo; +} + +void SelectionDAG::allnodes_clear() { + assert(&*AllNodes.begin() == &EntryNode); + AllNodes.remove(AllNodes.begin()); + while (!AllNodes.empty()) + DeallocateNode(&AllNodes.front()); +#ifndef NDEBUG + NextPersistentId = 0; +#endif +} + +BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, + SDVTList VTs, SDValue N1, + SDValue N2, + const SDNodeFlags *Flags) { + if (isBinOpWithFlags(Opcode)) { + // If no flags were passed in, use a default flags object. + SDNodeFlags F; + if (Flags == nullptr) + Flags = &F; + + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2, *Flags); + + return FN; + } + + BinarySDNode *N = new (NodeAllocator) + BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + return N; +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; + case ISD::Constant: + case ISD::ConstantFP: + llvm_unreachable("Querying for Constant and ConstantFP nodes requires " + "debug location. Use another overload."); + } + } + return N; +} + +SDNode *SelectionDAG::FindNodeOrInsertPos(const FoldingSetNodeID &ID, + DebugLoc DL, void *&InsertPos) { + SDNode *N = CSEMap.FindNodeOrInsertPos(ID, InsertPos); + if (N) { + switch (N->getOpcode()) { + default: break; // Process only regular (non-target) constant nodes. + case ISD::Constant: + case ISD::ConstantFP: + // Erase debug location from the node if the node is used at several + // different places to do not propagate one location to all uses as it + // leads to incorrect debug info. + if (N->getDebugLoc() != DL) + N->setDebugLoc(DebugLoc()); + break; + } + } + return N; +} + +void SelectionDAG::clear() { + allnodes_clear(); + OperandAllocator.Reset(); + CSEMap.clear(); + + ExtendedValueTypeNodes.clear(); + ExternalSymbols.clear(); + TargetExternalSymbols.clear(); + MCSymbols.clear(); + std::fill(CondCodeNodes.begin(), CondCodeNodes.end(), + static_cast<CondCodeSDNode*>(nullptr)); + std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(), + static_cast<SDNode*>(nullptr)); + + EntryNode.UseList = nullptr; + InsertNode(&EntryNode); + Root = getEntryNode(); + DbgInfo->clear(); +} + +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ANY_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::SIGN_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { + return VT.bitsGT(Op.getValueType()) ? + getNode(ISD::ZERO_EXTEND, DL, VT, Op) : + getNode(ISD::TRUNCATE, DL, VT, Op); +} + +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, + EVT OpVT) { + if (VT.bitsLE(Op.getValueType())) + return getNode(ISD::TRUNCATE, SL, VT, Op); + + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); + return getNode(TLI->getExtendForContent(BType), SL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(!VT.isVector() && + "getZeroExtendInReg should use the vector element type instead of " + "the vector type!"); + if (Op.getValueType() == VT) return Op; + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + APInt Imm = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + return getNode(ISD::AND, DL, Op.getValueType(), Op, + getConstant(Imm, DL, Op.getValueType())); +} + +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +/// getNOT - Create a bitwise NOT operation as (XOR Val, -1). +/// +SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue NegOne = + getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, VT); + return getNode(ISD::XOR, DL, VT, Val, NegOne); +} + +SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { + EVT EltVT = VT.getScalarType(); + SDValue TrueValue; + switch (TLI->getBooleanContents(VT)) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = getConstant(1, DL, VT); + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, + VT); + break; + } + return getNode(ISD::XOR, DL, VT, Val, TrueValue); +} + +SDValue SelectionDAG::getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isT, + bool isO) { + EVT EltVT = VT.getScalarType(); + assert((EltVT.getSizeInBits() >= 64 || + (uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) && + "getConstant with a uint64_t value that doesn't fit in the type!"); + return getConstant(APInt(EltVT.getSizeInBits(), Val), DL, VT, isT, isO); +} + +SDValue SelectionDAG::getConstant(const APInt &Val, SDLoc DL, EVT VT, bool isT, + bool isO) +{ + return getConstant(*ConstantInt::get(*Context, Val), DL, VT, isT, isO); +} + +SDValue SelectionDAG::getConstant(const ConstantInt &Val, SDLoc DL, EVT VT, + bool isT, bool isO) { + assert(VT.isInteger() && "Cannot create FP integer constant!"); + + EVT EltVT = VT.getScalarType(); + const ConstantInt *Elt = &Val; + + // In some cases the vector type is legal but the element type is illegal and + // needs to be promoted, for example v8i8 on ARM. In this case, promote the + // inserted value (the type does not need to match the vector element type). + // Any extra bits introduced will be truncated away. + if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypePromoteInteger) { + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); + Elt = ConstantInt::get(*getContext(), NewVal); + } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + APInt NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector<SDValue, 2> EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .trunc(ViaEltSizeInBits), DL, + ViaEltVT, isT, isO)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (getDataLayout().isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, + getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, + Ops)); + return Result; + } + + assert(Elt->getBitWidth() == EltVT.getSizeInBits() && + "APInt size does not match type size!"); + unsigned Opc = isT ? ISD::TargetConstant : ISD::Constant; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + ID.AddPointer(Elt); + ID.AddBoolean(isO); + void *IP = nullptr; + SDNode *N = nullptr; + if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = new (NodeAllocator) ConstantSDNode(isT, isO, Elt, DL.getDebugLoc(), + EltVT); + CSEMap.InsertNode(N, IP); + InsertNode(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); + } + return Result; +} + +SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, SDLoc DL, bool isTarget) { + return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget); +} + +SDValue SelectionDAG::getConstantFP(const APFloat& V, SDLoc DL, EVT VT, + bool isTarget) { + return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget); +} + +SDValue SelectionDAG::getConstantFP(const ConstantFP& V, SDLoc DL, EVT VT, + bool isTarget){ + assert(VT.isFloatingPoint() && "Cannot create integer FP constant!"); + + EVT EltVT = VT.getScalarType(); + + // Do the map lookup using the actual bit pattern for the floating point + // value, so that we don't have problems with 0.0 comparing equal to -0.0, and + // we don't have issues with SNANs. + unsigned Opc = isTarget ? ISD::TargetConstantFP : ISD::ConstantFP; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(EltVT), None); + ID.AddPointer(&V); + void *IP = nullptr; + SDNode *N = nullptr; + if ((N = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP))) + if (!VT.isVector()) + return SDValue(N, 0); + + if (!N) { + N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, DL.getDebugLoc(), + EltVT); + CSEMap.InsertNode(N, IP); + InsertNode(N); + } + + SDValue Result(N, 0); + if (VT.isVector()) { + SmallVector<SDValue, 8> Ops; + Ops.assign(VT.getVectorNumElements(), Result); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Ops); + } + return Result; +} + +SDValue SelectionDAG::getConstantFP(double Val, SDLoc DL, EVT VT, + bool isTarget) { + EVT EltVT = VT.getScalarType(); + if (EltVT==MVT::f32) + return getConstantFP(APFloat((float)Val), DL, VT, isTarget); + else if (EltVT==MVT::f64) + return getConstantFP(APFloat(Val), DL, VT, isTarget); + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || + EltVT==MVT::f16) { + bool ignored; + APFloat apf = APFloat(Val); + apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + &ignored); + return getConstantFP(apf, DL, VT, isTarget); + } else + llvm_unreachable("Unsupported type in getConstantFP"); +} + +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, + EVT VT, int64_t Offset, + bool isTargetGA, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTargetGA) && + "Cannot set target flags on target-independent globals"); + + // Truncate (with sign-extension) the offset value to the pointer size. + unsigned BitWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); + if (BitWidth < 64) + Offset = SignExtend64(Offset, BitWidth); + + unsigned Opc; + if (GV->isThreadLocal()) + Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; + else + Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddPointer(GV); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + ID.AddInteger(GV->getType()->getAddressSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), + DL.getDebugLoc(), GV, VT, + Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { + unsigned Opc = isTarget ? ISD::TargetFrameIndex : ISD::FrameIndex; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(FI); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent jump tables"); + unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(JTI); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, + TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + ID.AddPointer(C); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, + unsigned Alignment, int Offset, + bool isTarget, + unsigned char TargetFlags) { + assert((TargetFlags == 0 || isTarget) && + "Cannot set target flags on target-independent globals"); + if (Alignment == 0) + Alignment = getDataLayout().getPrefTypeAlignment(C->getType()); + unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddInteger(Alignment); + ID.AddInteger(Offset); + C->addSelectionDAGCSEId(ID); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset, + unsigned char TargetFlags) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None); + ID.AddInteger(Index); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = + new (NodeAllocator) TargetIndexSDNode(Index, VT, Offset, TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), None); + ID.AddPointer(MBB); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getValueType(EVT VT) { + if (VT.isSimple() && (unsigned)VT.getSimpleVT().SimpleTy >= + ValueTypeNodes.size()) + ValueTypeNodes.resize(VT.getSimpleVT().SimpleTy+1); + + SDNode *&N = VT.isExtended() ? + ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; + + if (N) return SDValue(N, 0); + N = new (NodeAllocator) VTSDNode(VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { + SDNode *&N = ExternalSymbols[Sym]; + if (N) return SDValue(N, 0); + N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) { + SDNode *&N = MCSymbols[Sym]; + if (N) + return SDValue(N, 0); + N = new (NodeAllocator) MCSymbolSDNode(Sym, VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, + unsigned char TargetFlags) { + SDNode *&N = + TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, + TargetFlags)]; + if (N) return SDValue(N, 0); + N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { + if ((unsigned)Cond >= CondCodeNodes.size()) + CondCodeNodes.resize(Cond+1); + + if (!CondCodeNodes[Cond]) { + CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); + CondCodeNodes[Cond] = N; + InsertNode(N); + } + + return SDValue(CondCodeNodes[Cond], 0); +} + +// commuteShuffle - swaps the values of N1 and N2, and swaps all indices in +// the shuffle mask M that point at N1 to point at N2, and indices that point +// N2 to point at N1. +static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl<int> &M) { + std::swap(N1, N2); + ShuffleVectorSDNode::commuteMask(M); +} + +SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, + SDValue N2, const int *Mask) { + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); + + // Canonicalize shuffle undef, undef -> undef + if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // Validate that all indices in Mask are within the range of the elements + // input to the shuffle. + unsigned NElts = VT.getVectorNumElements(); + SmallVector<int, 8> MaskVec; + for (unsigned i = 0; i != NElts; ++i) { + assert(Mask[i] < (int)(NElts * 2) && "Index out of range"); + MaskVec.push_back(Mask[i]); + } + + // Canonicalize shuffle v, v -> v, undef + if (N1 == N2) { + N2 = getUNDEF(VT); + for (unsigned i = 0; i != NElts; ++i) + if (MaskVec[i] >= (int)NElts) MaskVec[i] -= NElts; + } + + // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. + if (N1.getOpcode() == ISD::UNDEF) + commuteShuffle(N1, N2, MaskVec); + + // If shuffling a splat, try to blend the splat instead. We do this here so + // that even when this arises during lowering we don't have to re-handle it. + auto BlendSplat = [&](BuildVectorSDNode *BV, int Offset) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + if (!Splat) + return; + + for (int i = 0; i < (int)NElts; ++i) { + if (MaskVec[i] < Offset || MaskVec[i] >= (Offset + (int)NElts)) + continue; + + // If this input comes from undef, mark it as such. + if (UndefElements[MaskVec[i] - Offset]) { + MaskVec[i] = -1; + continue; + } + + // If we can blend a non-undef lane, use that instead. + if (!UndefElements[i]) + MaskVec[i] = i + Offset; + } + }; + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + BlendSplat(N1BV, 0); + if (auto *N2BV = dyn_cast<BuildVectorSDNode>(N2)) + BlendSplat(N2BV, NElts); + + // Canonicalize all index into lhs, -> shuffle lhs, undef + // Canonicalize all index into rhs, -> shuffle rhs, undef + bool AllLHS = true, AllRHS = true; + bool N2Undef = N2.getOpcode() == ISD::UNDEF; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= (int)NElts) { + if (N2Undef) + MaskVec[i] = -1; + else + AllLHS = false; + } else if (MaskVec[i] >= 0) { + AllRHS = false; + } + } + if (AllLHS && AllRHS) + return getUNDEF(VT); + if (AllLHS && !N2Undef) + N2 = getUNDEF(VT); + if (AllRHS) { + N1 = getUNDEF(VT); + commuteShuffle(N1, N2, MaskVec); + } + // Reset our undef status after accounting for the mask. + N2Undef = N2.getOpcode() == ISD::UNDEF; + // Re-check whether both sides ended up undef. + if (N1.getOpcode() == ISD::UNDEF && N2Undef) + return getUNDEF(VT); + + // If Identity shuffle return that node. + bool Identity = true, AllSame = true; + for (unsigned i = 0; i != NElts; ++i) { + if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; + if (MaskVec[i] != MaskVec[0]) AllSame = false; + } + if (Identity && NElts) + return N1; + + // Shuffling a constant splat doesn't change the result. + if (N2Undef) { + SDValue V = N1; + + // Look through any bitcasts. We check that these don't change the number + // (and size) of elements and just changes their types. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + // If this is a splat of an undef, shuffling it is also undef. + if (Splat && Splat.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + bool SameNumElts = + V.getValueType().getVectorNumElements() == VT.getVectorNumElements(); + + // We only have a splat which can skip shuffles if there is a splatted + // value and no undef lanes rearranged by the shuffle. + if (Splat && UndefElements.none()) { + // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the + // number of elements match or the value splatted is a zero constant. + if (SameNumElts) + return N1; + if (auto *C = dyn_cast<ConstantSDNode>(Splat)) + if (C->isNullValue()) + return N1; + } + + // If the shuffle itself creates a splat, build the vector directly. + if (AllSame && SameNumElts) { + const SDValue &Splatted = BV->getOperand(MaskVec[0]); + SmallVector<SDValue, 8> Ops(NElts, Splatted); + + EVT BuildVT = BV->getValueType(0); + SDValue NewBV = getNode(ISD::BUILD_VECTOR, dl, BuildVT, Ops); + + // We may have jumped through bitcasts, so the type of the + // BUILD_VECTOR may not match the type of the shuffle. + if (BuildVT != VT) + NewBV = getNode(ISD::BITCAST, dl, VT, NewBV); + return NewBV; + } + } + } + + FoldingSetNodeID ID; + SDValue Ops[2] = { N1, N2 }; + AddNodeIDNode(ID, ISD::VECTOR_SHUFFLE, getVTList(VT), Ops); + for (unsigned i = 0; i != NElts; ++i) + ID.AddInteger(MaskVec[i]); + + void* IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + return SDValue(E, 0); + + // Allocate the mask array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the NodeAllocator is released. + int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); + memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); + + ShuffleVectorSDNode *N = + new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), N1, N2, + MaskAlloc); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getCommutedVectorShuffle(const ShuffleVectorSDNode &SV) { + MVT VT = SV.getSimpleValueType(0); + SmallVector<int, 8> MaskVec(SV.getMask().begin(), SV.getMask().end()); + ShuffleVectorSDNode::commuteMask(MaskVec); + + SDValue Op0 = SV.getOperand(0); + SDValue Op1 = SV.getOperand(1); + return getVectorShuffle(VT, SDLoc(&SV), Op1, Op0, &MaskVec[0]); +} + +SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, + SDValue Val, SDValue DTy, + SDValue STy, SDValue Rnd, SDValue Sat, + ISD::CvtCode Code) { + // If the src and dest types are the same and the conversion is between + // integer types of the same sign or two floats, no conversion is necessary. + if (DTy == STy && + (Code == ISD::CVT_UU || Code == ISD::CVT_SS || Code == ISD::CVT_FF)) + return Val; + + FoldingSetNodeID ID; + SDValue Ops[] = { Val, DTy, STy, Rnd, Sat }; + AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), Ops); + void* IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + return SDValue(E, 0); + + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), + Ops, Code); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::Register, getVTList(VT), None); + ID.AddInteger(RegNo); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::RegisterMask, getVTList(MVT::Untyped), None); + ID.AddPointer(RegMask); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) RegisterMaskSDNode(RegMask); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { + FoldingSetNodeID ID; + SDValue Ops[] = { Root }; + AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), Ops); + ID.AddPointer(Label); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), + dl.getDebugLoc(), Root, Label); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + + +SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT, + int64_t Offset, + bool isTarget, + unsigned char TargetFlags) { + unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; + + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, getVTList(VT), None); + ID.AddPointer(BA); + ID.AddInteger(Offset); + ID.AddInteger(TargetFlags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, Offset, + TargetFlags); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getSrcValue(const Value *V) { + assert((!V || V->getType()->isPointerTy()) && + "SrcValue is not a pointer?"); + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None); + ID.AddPointer(V); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) SrcValueSDNode(V); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +/// getMDNode - Return an MDNodeSDNode which holds an MDNode. +SDValue SelectionDAG::getMDNode(const MDNode *MD) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), None); + ID.AddPointer(MD); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) MDNodeSDNode(MD); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getBitcast(EVT VT, SDValue V) { + if (VT == V.getValueType()) + return V; + + return getNode(ISD::BITCAST, SDLoc(V), VT, V); +} + +/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. +SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), Ops); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), + dl.getDebugLoc(), + VT, Ptr, SrcAS, DestAS); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +/// getShiftAmountOperand - Return the specified value casted to +/// the target's desired shift amount type. +SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { + EVT OpTy = Op.getValueType(); + EVT ShTy = TLI->getShiftAmountTy(LHSTy, getDataLayout()); + if (OpTy == ShTy || OpTy.isVector()) return Op; + + return getZExtOrTrunc(Op, SDLoc(Op), ShTy); +} + +SDValue SelectionDAG::expandVAArg(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + const Value *V = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); + EVT VT = Node->getValueType(0); + SDValue Tmp1 = Node->getOperand(0); + SDValue Tmp2 = Node->getOperand(1); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = + getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(Align - 1, dl, VAList.getValueType())); + + VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList, + getConstant(-(int64_t)Align, dl, VAList.getValueType())); + } + + // Increment the pointer, VAList, to the next vaarg + Tmp1 = getNode(ISD::ADD, dl, VAList.getValueType(), VAList, + getConstant(getDataLayout().getTypeAllocSize( + VT.getTypeForEVT(*getContext())), + dl, VAList.getValueType())); + // Store the incremented VAList to the legalized pointer + Tmp1 = getStore(VAListLoad.getValue(1), dl, Tmp1, Tmp2, + MachinePointerInfo(V), false, false, 0); + // Load the actual argument out of the pointer VAList + return getLoad(VT, dl, Tmp1, VAList, MachinePointerInfo(), + false, false, false, 0); +} + +SDValue SelectionDAG::expandVACopy(SDNode *Node) { + SDLoc dl(Node); + const TargetLowering &TLI = getTargetLoweringInfo(); + // This defaults to loading a pointer from the input and storing it to the + // output, returning the chain. + const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue(); + const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue(); + SDValue Tmp1 = getLoad(TLI.getPointerTy(getDataLayout()), dl, + Node->getOperand(0), Node->getOperand(2), + MachinePointerInfo(VS), false, false, false, 0); + return getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), + MachinePointerInfo(VD), false, false, 0); +} + +/// CreateStackTemporary - Create a stack temporary, suitable for holding the +/// specified value type. +SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + unsigned ByteSize = VT.getStoreSize(); + Type *Ty = VT.getTypeForEVT(*getContext()); + unsigned StackAlign = + std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign); + + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); +} + +/// CreateStackTemporary - Create a stack temporary suitable for holding +/// either of the specified value types. +SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { + unsigned Bytes = std::max(VT1.getStoreSize(), VT2.getStoreSize()); + Type *Ty1 = VT1.getTypeForEVT(*getContext()); + Type *Ty2 = VT2.getTypeForEVT(*getContext()); + const DataLayout &DL = getDataLayout(); + unsigned Align = + std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2)); + + MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); + int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); + return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout())); +} + +SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, + SDValue N2, ISD::CondCode Cond, SDLoc dl) { + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return getConstant(0, dl, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = + TLI->getBooleanContents(N1->getValueType(0)); + return getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, + VT); + } + + case ISD::SETOEQ: + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETONE: + case ISD::SETO: + case ISD::SETUO: + case ISD::SETUEQ: + case ISD::SETUNE: + assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!"); + break; + } + + if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) { + const APInt &C2 = N2C->getAPIntValue(); + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { + const APInt &C1 = N1C->getAPIntValue(); + + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: return getConstant(C1 == C2, dl, VT); + case ISD::SETNE: return getConstant(C1 != C2, dl, VT); + case ISD::SETULT: return getConstant(C1.ult(C2), dl, VT); + case ISD::SETUGT: return getConstant(C1.ugt(C2), dl, VT); + case ISD::SETULE: return getConstant(C1.ule(C2), dl, VT); + case ISD::SETUGE: return getConstant(C1.uge(C2), dl, VT); + case ISD::SETLT: return getConstant(C1.slt(C2), dl, VT); + case ISD::SETGT: return getConstant(C1.sgt(C2), dl, VT); + case ISD::SETLE: return getConstant(C1.sle(C2), dl, VT); + case ISD::SETGE: return getConstant(C1.sge(C2), dl, VT); + } + } + } + if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) { + if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) { + APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF()); + switch (Cond) { + default: break; + case ISD::SETEQ: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT); + case ISD::SETNE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpLessThan, dl, VT); + case ISD::SETLT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT); + case ISD::SETGT: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT); + case ISD::SETLE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan || + R==APFloat::cmpEqual, dl, VT); + case ISD::SETGE: if (R==APFloat::cmpUnordered) + return getUNDEF(VT); + // fall through + case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpEqual, dl, VT); + case ISD::SETO: return getConstant(R!=APFloat::cmpUnordered, dl, VT); + case ISD::SETUO: return getConstant(R==APFloat::cmpUnordered, dl, VT); + case ISD::SETUEQ: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpEqual, dl, VT); + case ISD::SETUNE: return getConstant(R!=APFloat::cmpEqual, dl, VT); + case ISD::SETULT: return getConstant(R==APFloat::cmpUnordered || + R==APFloat::cmpLessThan, dl, VT); + case ISD::SETUGT: return getConstant(R==APFloat::cmpGreaterThan || + R==APFloat::cmpUnordered, dl, VT); + case ISD::SETULE: return getConstant(R!=APFloat::cmpGreaterThan, dl, VT); + case ISD::SETUGE: return getConstant(R!=APFloat::cmpLessThan, dl, VT); + } + } else { + // Ensure that the constant occurs on the RHS. + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + MVT CompVT = N1.getValueType().getSimpleVT(); + if (!TLI->isCondCodeLegal(SwappedCond, CompVT)) + return SDValue(); + + return getSetCC(dl, VT, N2, N1, SwappedCond); + } + } + + // Could not fold it. + return SDValue(); +} + +/// SignBitIsZero - Return true if the sign bit of Op is known to be zero. We +/// use this predicate to simplify operations downstream. +bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const { + // This predicate is not safe for vector operations. + if (Op.getValueType().isVector()) + return false; + + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth); +} + +/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use +/// this predicate to simplify operations downstream. Mask is known to be zero +/// for bits that V cannot have. +bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, + unsigned Depth) const { + APInt KnownZero, KnownOne; + computeKnownBits(Op, KnownZero, KnownOne, Depth); + return (KnownZero & Mask) == Mask; +} + +/// Determine which bits of Op are known to be either zero or one and return +/// them in the KnownZero/KnownOne bitsets. +void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, + APInt &KnownOne, unsigned Depth) const { + unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); + + KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. + if (Depth == 6) + return; // Limit search depth. + + APInt KnownZero2, KnownOne2; + + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; + break; + case ISD::AND: + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: { + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + KnownZero = KnownZeroOut; + break; + } + case ISD::MUL: { + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + break; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be less than the denominator. + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clearAllBits(); + KnownZero2.clearAllBits(); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros(); + if (RHSUnknownLeadingOnes != BitWidth) + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - RHSUnknownLeadingOnes - 1); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ); + break; + } + case ISD::SELECT: + computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1); + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; + case ISD::SETCC: + // If we know the result of a setcc has the top bits zero, use this info. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; + case ISD::SHL: + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero <<= ShAmt; + KnownOne <<= ShAmt; + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt); + } + break; + case ISD::SRL: + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bits. + APInt SignBit = APInt::getSignBit(BitWidth); + SignBit = SignBit.lshr(ShAmt); // Adjust to where it is now in the mask. + + if (KnownZero.intersects(SignBit)) { + KnownZero |= HighBits; // New bits are known zero. + } else if (KnownOne.intersects(SignBit)) { + KnownOne |= HighBits; // New bits are known one. + } + } + break; + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + unsigned EBits = EVT.getScalarType().getSizeInBits(); + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - EBits); + + APInt InSignBit = APInt::getSignBit(EBits); + APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, EBits); + + // If the sign extended bits are demanded, we know that the sign + // bit is demanded. + InSignBit = InSignBit.zext(BitWidth); + if (NewBits.getBoolValue()) + InputDemandedBits |= InSignBit; + + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownOne &= InputDemandedBits; + KnownZero &= InputDemandedBits; + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + if (KnownZero.intersects(InSignBit)) { // Input sign bit known clear + KnownZero |= NewBits; + KnownOne &= ~NewBits; + } else if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTPOP: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownOne.clearAllBits(); + break; + } + case ISD::LOAD: { + LoadSDNode *LD = cast<LoadSDNode>(Op); + // If this is a ZEXTLoad and we are looking at the loaded value. + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { + EVT VT = LD->getMemoryVT(); + unsigned MemBits = VT.getScalarType().getSizeInBits(); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); + } else if (const MDNode *Ranges = LD->getRanges()) { + if (LD->getExtensionType() == ISD::NON_EXTLOAD) + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero, KnownOne); + } + break; + } + case ISD::ZERO_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); + + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + + // Note if the sign bit is known to be zero or one. + bool SignBitKnownZero = KnownZero.isNegative(); + bool SignBitKnownOne = KnownOne.isNegative(); + + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + + // If the sign bit is known zero or one, the top bits match. + if (SignBitKnownZero) + KnownZero |= NewBits; + else if (SignBitKnownOne) + KnownOne |= NewBits; + break; + } + case ISD::ANY_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + KnownZero = KnownZero.trunc(InBits); + KnownOne = KnownOne.trunc(InBits); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + KnownZero = KnownZero.zext(InBits); + KnownOne = KnownOne.zext(InBits); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + break; + } + case ISD::AssertZext: { + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, VT.getSizeInBits()); + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + KnownZero |= (~InMask); + KnownOne &= (~KnownZero); + break; + } + case ISD::FGETSIGN: + // All bits are zero except the low bit. + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; + + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2); + } + } + } + } + // fall through + case ISD::ADD: + case ISD::ADDE: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1); + unsigned KnownZeroHigh = KnownZero2.countLeadingOnes(); + unsigned KnownZeroLow = KnownZero2.countTrailingOnes(); + + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + KnownZeroHigh = std::min(KnownZeroHigh, + KnownZero2.countLeadingOnes()); + KnownZeroLow = std::min(KnownZeroLow, + KnownZero2.countTrailingOnes()); + + if (Op.getOpcode() == ISD::ADD) { + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow); + if (KnownZeroHigh > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1); + break; + } + + // With ADDE, a carry bit may be added in, so we can only use this + // information if we know (at least) that the low two bits are clear. We + // then return to the caller that the low bit is unknown but that other bits + // are known zero. + if (KnownZeroLow >= 2) // ADDE + KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroLow); + break; + } + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue().abs(); + if (RA.isPowerOf2()) { + APInt LowBits = RA - 1; + computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); + + // The low bits of the first operand are unchanged by the srem. + KnownZero = KnownZero2 & LowBits; + KnownOne = KnownOne2 & LowBits; + + // If the first operand is non-negative or has all low bits zero, then + // the upper bits are all zero. + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero |= ~LowBits; + + // If the first operand is negative and not all low bits are zero, then + // the upper bits are all one. + if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) + KnownOne |= ~LowBits; + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + } + } + break; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + const APInt &RA = Rem->getAPIntValue(); + if (RA.isPowerOf2()) { + APInt LowBits = (RA - 1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1); + + // The upper bits are all zero, the lower ones are unchanged. + KnownZero = KnownZero2 | ~LowBits; + KnownOne = KnownOne2 & LowBits; + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clearAllBits(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders); + break; + } + case ISD::EXTRACT_ELEMENT: { + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + const unsigned Index = + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + const unsigned BitWidth = Op.getValueType().getSizeInBits(); + + // Remove low part of known bits mask + KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth); + KnownOne = KnownOne.getHiBits(KnownOne.getBitWidth() - Index * BitWidth); + + // Remove high part of known bit mask + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + break; + } + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: { + APInt Op0Zero, Op0One; + APInt Op1Zero, Op1One; + computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth); + computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth); + + KnownZero = Op0Zero & Op1Zero; + KnownOne = Op0One & Op1One; + break; + } + case ISD::FrameIndex: + case ISD::TargetFrameIndex: + if (unsigned Align = InferPtrAlignment(Op)) { + // The low bits are known zero if the pointer is aligned. + KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align)); + break; + } + break; + + default: + if (Op.getOpcode() < ISD::BUILTIN_OP_END) + break; + // Fallthrough + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_W_CHAIN: + case ISD::INTRINSIC_VOID: + // Allow the target to implement this method for its nodes. + TLI->computeKnownBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + break; + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); +} + +/// ComputeNumSignBits - Return the number of times the sign bit of the +/// register is replicated into the other bits. We know that at least 1 bit +/// is always equal to the sign bit (itself), but other cases can give us +/// information. For example, immediately after an "SRA X, 2", we know that +/// the top 3 bits are all equal to each other, so we return 3. +unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + EVT VT = Op.getValueType(); + assert(VT.isInteger() && "Invalid VT!"); + unsigned VTBits = VT.getScalarType().getSizeInBits(); + unsigned Tmp, Tmp2; + unsigned FirstAnswer = 1; + + if (Depth == 6) + return 1; // Limit search depth. + + switch (Op.getOpcode()) { + default: break; + case ISD::AssertSext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::AssertZext: + Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getSizeInBits(); + return VTBits-Tmp; + + case ISD::Constant: { + const APInt &Val = cast<ConstantSDNode>(Op)->getAPIntValue(); + return Val.getNumSignBits(); + } + + case ISD::SIGN_EXTEND: + Tmp = + VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; + + case ISD::SIGN_EXTEND_INREG: + // Max of the input and what this extends. + Tmp = + cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits(); + Tmp = VTBits-Tmp+1; + + Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); + return std::max(Tmp, Tmp2); + + case ISD::SRA: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + // SRA X, C -> adds C sign bits. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + Tmp += C->getZExtValue(); + if (Tmp > VTBits) Tmp = VTBits; + } + return Tmp; + case ISD::SHL: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + // shl destroys sign bits. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (C->getZExtValue() >= VTBits || // Bad shift. + C->getZExtValue() >= Tmp) break; // Shifted all sign bits out. + return Tmp - C->getZExtValue(); + } + break; + case ISD::AND: + case ISD::OR: + case ISD::XOR: // NOT is handled here. + // Logical binary ops preserve the number of sign bits at the worst. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp != 1) { + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + FirstAnswer = std::min(Tmp, Tmp2); + // We computed what we know about the sign bits as our first + // answer. Now proceed to the generic code that uses + // computeKnownBits, and pick whichever answer is better. + } + break; + + case ISD::SELECT: + Tmp = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(2), Depth+1); + return std::min(Tmp, Tmp2); + case ISD::SELECT_CC: + Tmp = ComputeNumSignBits(Op.getOperand(2), Depth+1); + if (Tmp == 1) return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(3), Depth+1); + return std::min(Tmp, Tmp2); + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1); + if (Tmp == 1) + return 1; // Early out. + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth + 1); + return std::min(Tmp, Tmp2); + case ISD::SADDO: + case ISD::UADDO: + case ISD::SSUBO: + case ISD::USUBO: + case ISD::SMULO: + case ISD::UMULO: + if (Op.getResNo() != 1) + break; + // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::SETCC: + // If setcc returns 0/-1, all bits are sign bits. + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; + case ISD::ROTL: + case ISD::ROTR: + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned RotAmt = C->getZExtValue() & (VTBits-1); + + // Handle rotate right by N like a rotate left by 32-N. + if (Op.getOpcode() == ISD::ROTR) + RotAmt = (VTBits-RotAmt) & (VTBits-1); + + // If we aren't rotating out all of the known-in sign bits, return the + // number that are left. This handles rotl(sext(x), 1) for example. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp > RotAmt+1) return Tmp-RotAmt; + } + break; + case ISD::ADD: + // Add can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + + // Special case decrementing a value (ADD X, -1): + if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + if (CRHS->isAllOnesValue()) { + APInt KnownZero, KnownOne; + computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); + + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + return VTBits; + + // If we are subtracting one from a positive number, there is no carry + // out of the result. + if (KnownZero.isNegative()) + return Tmp; + } + + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + return std::min(Tmp, Tmp2)-1; + + case ISD::SUB: + Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); + if (Tmp2 == 1) return 1; + + // Handle NEG. + if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) + if (CLHS->isNullValue()) { + APInt KnownZero, KnownOne; + computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1); + // If the input is known to be 0 or 1, the output is 0/-1, which is all + // sign bits set. + if ((KnownZero | APInt(VTBits, 1)).isAllOnesValue()) + return VTBits; + + // If the input is known to be positive (the sign bit is known clear), + // the output of the NEG has the same number of sign bits as the input. + if (KnownZero.isNegative()) + return Tmp2; + + // Otherwise, we treat this like a SUB. + } + + // Sub can have at most one carry bit. Thus we know that the output + // is, at worst, one more bit than the inputs. + Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); + if (Tmp == 1) return 1; // Early out. + return std::min(Tmp, Tmp2)-1; + case ISD::TRUNCATE: + // FIXME: it's tricky to do anything useful for this, but it is an important + // case for targets like X86. + break; + case ISD::EXTRACT_ELEMENT: { + const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1); + const int BitWidth = Op.getValueType().getSizeInBits(); + const int Items = + Op.getOperand(0).getValueType().getSizeInBits() / BitWidth; + + // Get reverse index (starting from 1), Op1 value indexes elements from + // little end. Sign starts at big end. + const int rIndex = Items - 1 - + cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); + + // If the sign portion ends in our element the subtraction gives correct + // result. Otherwise it gives either negative or > bitwidth result + return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0); + } + } + + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } + } + } + + // Allow the target to implement this method for its nodes. + if (Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) { + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, *this, Depth); + if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); + } + + // Finally, if we can prove that the top bits of the result are 0's or 1's, + // use this information. + APInt KnownZero, KnownOne; + computeKnownBits(Op, KnownZero, KnownOne, Depth); + + APInt Mask; + if (KnownZero.isNegative()) { // sign bit is 0 + Mask = KnownZero; + } else if (KnownOne.isNegative()) { // sign bit is 1; + Mask = KnownOne; + } else { + // Nothing known. + return FirstAnswer; + } + + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine + // the number of identical bits in the top of the input value. + Mask = ~Mask; + Mask <<= Mask.getBitWidth()-VTBits; + // Return # leading zeros. We use 'min' here in case Val was zero before + // shifting. We don't want to return '64' as for an i32 "0". + return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros())); +} + +/// isBaseWithConstantOffset - Return true if the specified operand is an +/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an +/// ISD::OR with a ConstantSDNode that is guaranteed to have the same +/// semantics as an ADD. This handles the equivalence: +/// X|Cst == X+Cst iff X&Cst = 0. +bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { + if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || + !isa<ConstantSDNode>(Op.getOperand(1))) + return false; + + if (Op.getOpcode() == ISD::OR && + !MaskedValueIsZero(Op.getOperand(0), + cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue())) + return false; + + return true; +} + + +bool SelectionDAG::isKnownNeverNaN(SDValue Op) const { + // If we're told that NaNs won't happen, assume they won't. + if (getTarget().Options.NoNaNsFPMath) + return true; + + // If the value is a constant, we can obviously see if it is a NaN or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->getValueAPF().isNaN(); + + // TODO: Recognize more cases here. + + return false; +} + +bool SelectionDAG::isKnownNeverZero(SDValue Op) const { + // If the value is a constant, we can obviously see if it is a zero or not. + if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op)) + return !C->isZero(); + + // TODO: Recognize more cases here. + switch (Op.getOpcode()) { + default: break; + case ISD::OR: + if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) + return !C->isNullValue(); + break; + } + + return false; +} + +bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { + // Check the obvious case. + if (A == B) return true; + + // For for negative and positive zero. + if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A)) + if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B)) + if (CA->isZero() && CB->isZero()) return true; + + // Otherwise they may not be equal. + return false; +} + +bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const { + assert(A.getValueType() == B.getValueType() && + "Values must have the same type"); + APInt AZero, AOne; + APInt BZero, BOne; + computeKnownBits(A, AZero, AOne); + computeKnownBits(B, BZero, BOne); + return (AZero | BZero).isAllOnesValue(); +} + +static SDValue FoldCONCAT_VECTORS(SDLoc DL, EVT VT, ArrayRef<SDValue> Ops, + llvm::SelectionDAG &DAG) { + if (Ops.size() == 1) + return Ops[0]; + + // Concat of UNDEFs is UNDEF. + if (std::all_of(Ops.begin(), Ops.end(), + [](SDValue Op) { return Op.isUndef(); })) + return DAG.getUNDEF(VT); + + // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified + // to one big BUILD_VECTOR. + // FIXME: Add support for UNDEF and SCALAR_TO_VECTOR as well. + if (!std::all_of(Ops.begin(), Ops.end(), [](SDValue Op) { + return Op.getOpcode() == ISD::BUILD_VECTOR; + })) + return SDValue(); + + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 16> Elts; + for (SDValue Op : Ops) + Elts.append(Op->op_begin(), Op->op_end()); + + // BUILD_VECTOR requires all inputs to be of the same type, find the + // maximum type and extend them all. + for (SDValue Op : Elts) + SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); + + if (SVT.bitsGT(VT.getScalarType())) + for (SDValue &Op : Elts) + Op = DAG.getTargetLoweringInfo().isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, DL, SVT) + : DAG.getSExtOrTrunc(Op, DL, SVT); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts); +} + +/// getNode - Gets or creates the specified node. +/// +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, getVTList(VT), None); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), getVTList(VT)); + CSEMap.InsertNode(N, IP); + + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, + EVT VT, SDValue Operand) { + // Constant fold unary operations with an integer constant operand. Even + // opaque constant will be folded, because the folding of unary operations + // doesn't create new constants with different values. Nevertheless, the + // opaque flag is preserved during folding to prevent future folding with + // other constants. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Operand)) { + const APInt &Val = C->getAPIntValue(); + switch (Opcode) { + default: break; + case ISD::SIGN_EXTEND: + return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::TRUNCATE: + return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), DL, VT, + C->isTargetOpcode(), C->isOpaque()); + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: { + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); + (void)apf.convertFromAPInt(Val, + Opcode==ISD::SINT_TO_FP, + APFloat::rmNearestTiesToEven); + return getConstantFP(apf, DL, VT); + } + case ISD::BITCAST: + if (VT == MVT::f16 && C->getValueType(0) == MVT::i16) + return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); + if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); + break; + case ISD::BSWAP: + return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTPOP: + return getConstant(Val.countPopulation(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + return getConstant(Val.countLeadingZeros(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + return getConstant(Val.countTrailingZeros(), DL, VT, C->isTargetOpcode(), + C->isOpaque()); + } + } + + // Constant fold unary operations with a floating point constant operand. + if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Operand)) { + APFloat V = C->getValueAPF(); // make copy + switch (Opcode) { + case ISD::FNEG: + V.changeSign(); + return getConstantFP(V, DL, VT); + case ISD::FABS: + V.clearSign(); + return getConstantFP(V, DL, VT); + case ISD::FCEIL: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardPositive); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + break; + } + case ISD::FTRUNC: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardZero); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + break; + } + case ISD::FFLOOR: { + APFloat::opStatus fs = V.roundToIntegral(APFloat::rmTowardNegative); + if (fs == APFloat::opOK || fs == APFloat::opInexact) + return getConstantFP(V, DL, VT); + break; + } + case ISD::FP_EXTEND: { + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, DL, VT); + } + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: { + integerPart x[2]; + bool ignored; + static_assert(integerPartWidth >= 64, "APFloat parts too small!"); + // FIXME need to be more flexible about rounding mode. + APFloat::opStatus s = V.convertToInteger(x, VT.getSizeInBits(), + Opcode==ISD::FP_TO_SINT, + APFloat::rmTowardZero, &ignored); + if (s==APFloat::opInvalidOp) // inexact is OK, in fact usual + break; + APInt api(VT.getSizeInBits(), x); + return getConstant(api, DL, VT); + } + case ISD::BITCAST: + if (VT == MVT::i16 && C->getValueType(0) == MVT::f16) + return getConstant((uint16_t)V.bitcastToAPInt().getZExtValue(), DL, VT); + else if (VT == MVT::i32 && C->getValueType(0) == MVT::f32) + return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), DL, VT); + else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64) + return getConstant(V.bitcastToAPInt().getZExtValue(), DL, VT); + break; + } + } + + // Constant fold unary operations with a vector integer or float operand. + if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Operand)) { + if (BV->isConstant()) { + switch (Opcode) { + default: + // FIXME: Entirely reasonable to perform folding of other unary + // operations here as the need arises. + break; + case ISD::FNEG: + case ISD::FABS: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FFLOOR: + case ISD::FP_EXTEND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::BSWAP: + case ISD::CTLZ: + case ISD::CTLZ_ZERO_UNDEF: + case ISD::CTTZ: + case ISD::CTTZ_ZERO_UNDEF: + case ISD::CTPOP: { + SDValue Ops = { Operand }; + if (SDValue Fold = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return Fold; + } + } + } + } + + unsigned OpOpcode = Operand.getNode()->getOpcode(); + switch (Opcode) { + case ISD::TokenFactor: + case ISD::MERGE_VALUES: + case ISD::CONCAT_VECTORS: + return Operand; // Factor, merge or concat of one node? No need. + case ISD::FP_ROUND: llvm_unreachable("Invalid method to make FP_ROUND node"); + case ISD::FP_EXTEND: + assert(VT.isFloatingPoint() && + Operand.getValueType().isFloatingPoint() && "Invalid FP cast!"); + if (Operand.getValueType() == VT) return Operand; // noop conversion. + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid fpext node, dst < src!"); + if (Operand.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SIGN_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid SIGN_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid sext node, dst < src!"); + if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // sext(undef) = 0, because the top bits will all be the same. + return getConstant(0, DL, VT); + break; + case ISD::ZERO_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ZERO_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid zext node, dst < src!"); + if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) + return getNode(ISD::ZERO_EXTEND, DL, VT, + Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // zext(undef) = 0, because the top bits will be zero. + return getConstant(0, DL, VT); + break; + case ISD::ANY_EXTEND: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid ANY_EXTEND!"); + if (Operand.getValueType() == VT) return Operand; // noop extension + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsLT(VT) && + "Invalid anyext node, dst < src!"); + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) + // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } + break; + case ISD::TRUNCATE: + assert(VT.isInteger() && Operand.getValueType().isInteger() && + "Invalid TRUNCATE!"); + if (Operand.getValueType() == VT) return Operand; // noop truncate + assert((!VT.isVector() || + VT.getVectorNumElements() == + Operand.getValueType().getVectorNumElements()) && + "Vector element count mismatch!"); + assert(Operand.getValueType().bitsGT(VT) && + "Invalid truncate node, src < dst!"); + if (OpOpcode == ISD::TRUNCATE) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) { + // If the source is smaller than the dest, we still need an extend. + if (Operand.getNode()->getOperand(0).getValueType().getScalarType() + .bitsLT(VT.getScalarType())) + return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT)) + return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0)); + return Operand.getNode()->getOperand(0); + } + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::BSWAP: + assert(VT.isInteger() && VT == Operand.getValueType() && + "Invalid BSWAP!"); + assert((VT.getScalarSizeInBits() % 16 == 0) && + "BSWAP types must be a multiple of 16 bits!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::BITCAST: + // Basic sanity checking. + assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits() + && "Cannot BITCAST between types of different sizes!"); + if (VT == Operand.getValueType()) return Operand; // noop conversion. + if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x) + return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0)); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + break; + case ISD::SCALAR_TO_VECTOR: + assert(VT.isVector() && !Operand.getValueType().isVector() && + (VT.getVectorElementType() == Operand.getValueType() || + (VT.getVectorElementType().isInteger() && + Operand.getValueType().isInteger() && + VT.getVectorElementType().bitsLE(Operand.getValueType()))) && + "Illegal SCALAR_TO_VECTOR node!"); + if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); + // scalar_to_vector(extract_vector_elt V, 0) -> V, top bits are undefined. + if (OpOpcode == ISD::EXTRACT_VECTOR_ELT && + isa<ConstantSDNode>(Operand.getOperand(1)) && + Operand.getConstantOperandVal(1) == 0 && + Operand.getOperand(0).getValueType() == VT) + return Operand.getOperand(0); + break; + case ISD::FNEG: + // -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0 + if (getTarget().Options.UnsafeFPMath && OpOpcode == ISD::FSUB) + // FIXME: FNEG has no fast-math-flags to propagate; use the FSUB's flags? + return getNode(ISD::FSUB, DL, VT, Operand.getNode()->getOperand(1), + Operand.getNode()->getOperand(0), + &cast<BinaryWithFlagsSDNode>(Operand.getNode())->Flags); + if (OpOpcode == ISD::FNEG) // --X -> X + return Operand.getNode()->getOperand(0); + break; + case ISD::FABS: + if (OpOpcode == ISD::FNEG) // abs(-X) -> abs(X) + return getNode(ISD::FABS, DL, VT, Operand.getNode()->getOperand(0)); + break; + } + + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Glue) { // Don't CSE flag producing nodes + FoldingSetNodeID ID; + SDValue Ops[1] = { Operand }; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); + } + + InsertNode(N); + return SDValue(N, 0); +} + +static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1, + const APInt &C2) { + switch (Opcode) { + case ISD::ADD: return std::make_pair(C1 + C2, true); + case ISD::SUB: return std::make_pair(C1 - C2, true); + case ISD::MUL: return std::make_pair(C1 * C2, true); + case ISD::AND: return std::make_pair(C1 & C2, true); + case ISD::OR: return std::make_pair(C1 | C2, true); + case ISD::XOR: return std::make_pair(C1 ^ C2, true); + case ISD::SHL: return std::make_pair(C1 << C2, true); + case ISD::SRL: return std::make_pair(C1.lshr(C2), true); + case ISD::SRA: return std::make_pair(C1.ashr(C2), true); + case ISD::ROTL: return std::make_pair(C1.rotl(C2), true); + case ISD::ROTR: return std::make_pair(C1.rotr(C2), true); + case ISD::SMIN: return std::make_pair(C1.sle(C2) ? C1 : C2, true); + case ISD::SMAX: return std::make_pair(C1.sge(C2) ? C1 : C2, true); + case ISD::UMIN: return std::make_pair(C1.ule(C2) ? C1 : C2, true); + case ISD::UMAX: return std::make_pair(C1.uge(C2) ? C1 : C2, true); + case ISD::UDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.udiv(C2), true); + case ISD::UREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.urem(C2), true); + case ISD::SDIV: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.sdiv(C2), true); + case ISD::SREM: + if (!C2.getBoolValue()) + break; + return std::make_pair(C1.srem(C2), true); + } + return std::make_pair(APInt(1, 0), false); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, + const ConstantSDNode *Cst1, + const ConstantSDNode *Cst2) { + if (Cst1->isOpaque() || Cst2->isOpaque()) + return SDValue(); + + std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(), + Cst2->getAPIntValue()); + if (!Folded.second) + return SDValue(); + return getConstant(Folded.first, DL, VT); +} + +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, SDLoc DL, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + // Handle the case of two scalars. + if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) { + if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) { + if (SDValue Folded = + FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2)) { + if (!VT.isVector()) + return Folded; + SmallVector<SDValue, 4> Outputs; + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); + } else { + return SDValue(); + } + } + } + + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1); + BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + EVT SVT = VT.getScalarType(); + SmallVector<SDValue, 4> Outputs; + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + if (V1->isOpaque() || V2->isOpaque()) + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + // Fold one vector element. + std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(), + V2->getAPIntValue()); + if (!Folded.second) + return SDValue(); + Outputs.push_back(getConstant(Folded.first, DL, SVT)); + } + + assert(VT.getVectorNumElements() == Outputs.size() && + "Vector size mismatch!"); + + // We may have a vector type but a scalar result. Create a splat. + Outputs.resize(VT.getVectorNumElements(), Outputs.back()); + + // Build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs); +} + +SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, + EVT VT, + ArrayRef<SDValue> Ops, + const SDNodeFlags *Flags) { + // If the opcode is a target-specific ISD node, there's nothing we can + // do here and the operand rules may not line up with the below, so + // bail early. + if (Opcode >= ISD::BUILTIN_OP_END) + return SDValue(); + + // We can only fold vectors - maybe merge with FoldConstantArithmetic someday? + if (!VT.isVector()) + return SDValue(); + + unsigned NumElts = VT.getVectorNumElements(); + + auto IsScalarOrSameVectorSize = [&](const SDValue &Op) { + return !Op.getValueType().isVector() || + Op.getValueType().getVectorNumElements() == NumElts; + }; + + auto IsConstantBuildVectorOrUndef = [&](const SDValue &Op) { + BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op); + return (Op.getOpcode() == ISD::UNDEF) || + (Op.getOpcode() == ISD::CONDCODE) || (BV && BV->isConstant()); + }; + + // All operands must be vector types with the same number of elements as + // the result type and must be either UNDEF or a build vector of constant + // or UNDEF scalars. + if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) || + !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize)) + return SDValue(); + + // If we are comparing vectors, then the result needs to be a i1 boolean + // that is then sign-extended back to the legal result type. + EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + + // Find legal integer scalar type for constant promotion and + // ensure that its scalar size is at least as large as source. + EVT LegalSVT = VT.getScalarType(); + if (LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); + if (LegalSVT.bitsLT(SVT)) + return SDValue(); + } + + // Constant fold each scalar lane separately. + SmallVector<SDValue, 4> ScalarResults; + for (unsigned i = 0; i != NumElts; i++) { + SmallVector<SDValue, 4> ScalarOps; + for (SDValue Op : Ops) { + EVT InSVT = Op.getValueType().getScalarType(); + BuildVectorSDNode *InBV = dyn_cast<BuildVectorSDNode>(Op); + if (!InBV) { + // We've checked that this is UNDEF or a constant of some kind. + if (Op.isUndef()) + ScalarOps.push_back(getUNDEF(InSVT)); + else + ScalarOps.push_back(Op); + continue; + } + + SDValue ScalarOp = InBV->getOperand(i); + EVT ScalarVT = ScalarOp.getValueType(); + + // Build vector (integer) scalar operands may need implicit + // truncation - do this before constant folding. + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + + ScalarOps.push_back(ScalarOp); + } + + // Constant fold the scalar operands. + SDValue ScalarResult = getNode(Opcode, DL, SVT, ScalarOps, Flags); + + // Legalize the (integer) scalar constant if necessary. + if (LegalSVT != SVT) + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); + + // Scalar folding only succeeded if the result is a constant or UNDEF. + if (ScalarResult.getOpcode() != ISD::UNDEF && + ScalarResult.getOpcode() != ISD::Constant && + ScalarResult.getOpcode() != ISD::ConstantFP) + return SDValue(); + ScalarResults.push_back(ScalarResult); + } + + assert(ScalarResults.size() == NumElts && + "Unexpected number of scalar results for BUILD_VECTOR"); + return getNode(ISD::BUILD_VECTOR, DL, VT, ScalarResults); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, + SDValue N2, const SDNodeFlags *Flags) { + ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); + ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + + // Canonicalize constant to RHS if commutative. + if (isCommutativeBinOp(Opcode)) { + if (N1C && !N2C) { + std::swap(N1C, N2C); + std::swap(N1, N2); + } else if (N1CFP && !N2CFP) { + std::swap(N1CFP, N2CFP); + std::swap(N1, N2); + } + } + + switch (Opcode) { + default: break; + case ISD::TokenFactor: + assert(VT == MVT::Other && N1.getValueType() == MVT::Other && + N2.getValueType() == MVT::Other && "Invalid token factor!"); + // Fold trivial token factors. + if (N1.getOpcode() == ISD::EntryToken) return N2; + if (N2.getOpcode() == ISD::EntryToken) return N1; + if (N1 == N2) return N1; + break; + case ISD::CONCAT_VECTORS: { + // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. + SDValue Ops[] = {N1, N2}; + if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::AND: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X & 0) -> 0. This commonly occurs when legalizing i64 values, so it's + // worth handling here. + if (N2C && N2C->isNullValue()) + return N2; + if (N2C && N2C->isAllOnesValue()) // X & -1 -> X + return N1; + break; + case ISD::OR: + case ISD::XOR: + case ISD::ADD: + case ISD::SUB: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + // (X ^|+- 0) -> X. This commonly occurs when legalizing i64 values, so + // it's worth handling here. + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::UDIV: + case ISD::UREM: + case ISD::MULHU: + case ISD::MULHS: + case ISD::MUL: + case ISD::SDIV: + case ISD::SREM: + case ISD::SMIN: + case ISD::SMAX: + case ISD::UMIN: + case ISD::UMAX: + assert(VT.isInteger() && "This operator does not apply to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (getTarget().Options.UnsafeFPMath) { + if (Opcode == ISD::FADD) { + // x+0 --> x + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FSUB) { + // x-0 --> x + if (N2CFP && N2CFP->getValueAPF().isZero()) + return N1; + } else if (Opcode == ISD::FMUL) { + // x*0 --> 0 + if (N2CFP && N2CFP->isZero()) + return N2; + // x*1 --> x + if (N2CFP && N2CFP->isExactlyValue(1.0)) + return N1; + } + } + assert(VT.isFloatingPoint() && "This operator only applies to FP types!"); + assert(N1.getValueType() == N2.getValueType() && + N1.getValueType() == VT && "Binary operator types must match!"); + break; + case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match. + assert(N1.getValueType() == VT && + N1.getValueType().isFloatingPoint() && + N2.getValueType().isFloatingPoint() && + "Invalid FCOPYSIGN!"); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + assert(VT == N1.getValueType() && + "Shift operators return type must be the same as their first arg"); + assert(VT.isInteger() && N2.getValueType().isInteger() && + "Shifts only work on integers"); + assert((!VT.isVector() || VT == N2.getValueType()) && + "Vector shift amounts must be in the same as their first arg"); + // Verify that the shift amount VT is bit enough to hold valid shift + // amounts. This catches things like trying to shift an i1024 value by an + // i8, which is easy to fall into in generic code that uses + // TLI.getShiftAmount(). + assert(N2.getValueType().getSizeInBits() >= + Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + "Invalid use of small shift amount with oversized value!"); + + // Always fold shifts of i1 values so the code generator doesn't need to + // handle them. Since we know the size of the shift has to be less than the + // size of the value, the shift/rotate count is guaranteed to be zero. + if (VT == MVT::i1) + return N1; + if (N2C && N2C->isNullValue()) + return N1; + break; + case ISD::FP_ROUND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg round!"); + assert(VT.isFloatingPoint() && EVT.isFloatingPoint() && + "Cannot FP_ROUND_INREG integer types"); + assert(EVT.isVector() == VT.isVector() && + "FP_ROUND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in FP_ROUND_INREG"); + assert(EVT.bitsLE(VT) && "Not rounding down!"); + (void)EVT; + if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding. + break; + } + case ISD::FP_ROUND: + assert(VT.isFloatingPoint() && + N1.getValueType().isFloatingPoint() && + VT.bitsLE(N1.getValueType()) && + N2C && "Invalid FP_ROUND!"); + if (N1.getValueType() == VT) return N1; // noop conversion. + break; + case ISD::AssertSext: + case ISD::AssertZext: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(!EVT.isVector() && + "AssertSExt/AssertZExt type should be the vector element type " + "rather than the vector type!"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (VT == EVT) return N1; // noop assertion. + break; + } + case ISD::SIGN_EXTEND_INREG: { + EVT EVT = cast<VTSDNode>(N2)->getVT(); + assert(VT == N1.getValueType() && "Not an inreg extend!"); + assert(VT.isInteger() && EVT.isInteger() && + "Cannot *_EXTEND_INREG FP types"); + assert(EVT.isVector() == VT.isVector() && + "SIGN_EXTEND_INREG type should be vector iff the operand " + "type is vector!"); + assert((!EVT.isVector() || + EVT.getVectorNumElements() == VT.getVectorNumElements()) && + "Vector element counts must match in SIGN_EXTEND_INREG"); + assert(EVT.bitsLE(VT) && "Not extending!"); + if (EVT == VT) return N1; // Not actually extending + + auto SignExtendInReg = [&](APInt Val) { + unsigned FromBits = EVT.getScalarType().getSizeInBits(); + Val <<= Val.getBitWidth() - FromBits; + Val = Val.ashr(Val.getBitWidth() - FromBits); + return getConstant(Val, DL, VT.getScalarType()); + }; + + if (N1C) { + APInt Val = N1C->getAPIntValue(); + return SignExtendInReg(Val); + } + if (ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) { + SmallVector<SDValue, 8> Ops; + for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) { + SDValue Op = N1.getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) { + Ops.push_back(getUNDEF(VT.getScalarType())); + continue; + } + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { + APInt Val = C->getAPIntValue(); + Val = Val.zextOrTrunc(VT.getScalarSizeInBits()); + Ops.push_back(SignExtendInReg(Val)); + continue; + } + break; + } + if (Ops.size() == VT.getVectorNumElements()) + return getNode(ISD::BUILD_VECTOR, DL, VT, Ops); + } + break; + } + case ISD::EXTRACT_VECTOR_ELT: + // EXTRACT_VECTOR_ELT of an UNDEF is an UNDEF. + if (N1.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of out-of-bounds element is an UNDEF + if (N2C && N2C->getZExtValue() >= N1.getValueType().getVectorNumElements()) + return getUNDEF(VT); + + // EXTRACT_VECTOR_ELT of CONCAT_VECTORS is often formed while lowering is + // expanding copies of large vectors from registers. + if (N2C && + N1.getOpcode() == ISD::CONCAT_VECTORS && + N1.getNumOperands() > 0) { + unsigned Factor = + N1.getOperand(0).getValueType().getVectorNumElements(); + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, + N1.getOperand(N2C->getZExtValue() / Factor), + getConstant(N2C->getZExtValue() % Factor, DL, + N2.getValueType())); + } + + // EXTRACT_VECTOR_ELT of BUILD_VECTOR is often formed while lowering is + // expanding large vector constants. + if (N2C && N1.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt = N1.getOperand(N2C->getZExtValue()); + + if (VT != Elt.getValueType()) + // If the vector element type is not legal, the BUILD_VECTOR operands + // are promoted and implicitly truncated, and the result implicitly + // extended. Make that explicit here. + Elt = getAnyExtOrTrunc(Elt, DL, VT); + + return Elt; + } + + // EXTRACT_VECTOR_ELT of INSERT_VECTOR_ELT is often formed when vector + // operations are lowered to scalars. + if (N1.getOpcode() == ISD::INSERT_VECTOR_ELT) { + // If the indices are the same, return the inserted element else + // if the indices are known different, extract the element from + // the original vector. + SDValue N1Op2 = N1.getOperand(2); + ConstantSDNode *N1Op2C = dyn_cast<ConstantSDNode>(N1Op2); + + if (N1Op2C && N2C) { + if (N1Op2C->getZExtValue() == N2C->getZExtValue()) { + if (VT == N1.getOperand(1).getValueType()) + return N1.getOperand(1); + else + return getSExtOrTrunc(N1.getOperand(1), DL, VT); + } + + return getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, N1.getOperand(0), N2); + } + } + break; + case ISD::EXTRACT_ELEMENT: + assert(N2C && (unsigned)N2C->getZExtValue() < 2 && "Bad EXTRACT_ELEMENT!"); + assert(!N1.getValueType().isVector() && !VT.isVector() && + (N1.getValueType().isInteger() == VT.isInteger()) && + N1.getValueType() != VT && + "Wrong types for EXTRACT_ELEMENT!"); + + // EXTRACT_ELEMENT of BUILD_PAIR is often formed while legalize is expanding + // 64-bit integers into 32-bit parts. Instead of building the extract of + // the BUILD_PAIR, only to have legalize rip it apart, just do it now. + if (N1.getOpcode() == ISD::BUILD_PAIR) + return N1.getOperand(N2C->getZExtValue()); + + // EXTRACT_ELEMENT of a constant int is also very common. + if (N1C) { + unsigned ElementSize = VT.getSizeInBits(); + unsigned Shift = ElementSize * N2C->getZExtValue(); + APInt ShiftedVal = N1C->getAPIntValue().lshr(Shift); + return getConstant(ShiftedVal.trunc(ElementSize), DL, VT); + } + break; + case ISD::EXTRACT_SUBVECTOR: + if (VT.isSimple() && N1.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + "Extract subvector VTs must be a vectors!"); + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && + "Extract subvector VTs must have the same element type!"); + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && + "Extract subvector must be from larger vector to smaller vector!"); + + if (N2C) { + assert((VT.getVectorNumElements() + N2C->getZExtValue() + <= N1.getValueType().getVectorNumElements()) + && "Extract subvector overflow!"); + } + + // Trivial extraction. + if (VT.getSimpleVT() == N1.getSimpleValueType()) + return N1; + } + break; + } + + // Perform trivial constant folding. + if (SDValue SV = + FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode())) + return SV; + + // Constant fold FP operations. + bool HasFPExceptions = TLI->hasFloatingPointExceptions(); + if (N1CFP) { + if (N2CFP) { + APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF(); + APFloat::opStatus s; + switch (Opcode) { + case ISD::FADD: + s = V1.add(V2, APFloat::rmNearestTiesToEven); + if (!HasFPExceptions || s != APFloat::opInvalidOp) + return getConstantFP(V1, DL, VT); + break; + case ISD::FSUB: + s = V1.subtract(V2, APFloat::rmNearestTiesToEven); + if (!HasFPExceptions || s!=APFloat::opInvalidOp) + return getConstantFP(V1, DL, VT); + break; + case ISD::FMUL: + s = V1.multiply(V2, APFloat::rmNearestTiesToEven); + if (!HasFPExceptions || s!=APFloat::opInvalidOp) + return getConstantFP(V1, DL, VT); + break; + case ISD::FDIV: + s = V1.divide(V2, APFloat::rmNearestTiesToEven); + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { + return getConstantFP(V1, DL, VT); + } + break; + case ISD::FREM : + s = V1.mod(V2); + if (!HasFPExceptions || (s!=APFloat::opInvalidOp && + s!=APFloat::opDivByZero)) { + return getConstantFP(V1, DL, VT); + } + break; + case ISD::FCOPYSIGN: + V1.copySign(V2); + return getConstantFP(V1, DL, VT); + default: break; + } + } + + if (Opcode == ISD::FP_ROUND) { + APFloat V = N1CFP->getValueAPF(); // make copy + bool ignored; + // This can return overflow, underflow, or inexact; we don't care. + // FIXME need to be more flexible about rounding mode. + (void)V.convert(EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &ignored); + return getConstantFP(V, DL, VT); + } + } + + // Canonicalize an UNDEF to the RHS, even over a constant. + if (N1.getOpcode() == ISD::UNDEF) { + if (isCommutativeBinOp(Opcode)) { + std::swap(N1, N2); + } else { + switch (Opcode) { + case ISD::FP_ROUND_INREG: + case ISD::SIGN_EXTEND_INREG: + case ISD::SUB: + case ISD::FSUB: + case ISD::FDIV: + case ISD::FREM: + case ISD::SRA: + return N1; // fold op(undef, arg2) -> undef + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, DL, VT); // fold op(undef, arg2) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N2; + } + } + } + + // Fold a bunch of operators when the RHS is undef. + if (N2.getOpcode() == ISD::UNDEF) { + switch (Opcode) { + case ISD::XOR: + if (N1.getOpcode() == ISD::UNDEF) + // Handle undef ^ undef -> 0 special case. This is a common + // idiom (misuse). + return getConstant(0, DL, VT); + // fallthrough + case ISD::ADD: + case ISD::ADDC: + case ISD::ADDE: + case ISD::SUB: + case ISD::UDIV: + case ISD::SDIV: + case ISD::UREM: + case ISD::SREM: + return N2; // fold op(arg1, undef) -> undef + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (getTarget().Options.UnsafeFPMath) + return N2; + break; + case ISD::MUL: + case ISD::AND: + case ISD::SRL: + case ISD::SHL: + if (!VT.isVector()) + return getConstant(0, DL, VT); // fold op(arg1, undef) -> 0 + // For vectors, we can't easily build an all zero vector, just return + // the LHS. + return N1; + case ISD::OR: + if (!VT.isVector()) + return getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, VT); + // For vectors, we can't easily build an all one vector, just return + // the LHS. + return N1; + case ISD::SRA: + return N1; + } + } + + // Memoize this node if possible. + BinarySDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Glue) { + SDValue Ops[] = {N1, N2}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + AddNodeIDFlags(ID, Opcode, Flags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + + CSEMap.InsertNode(N, IP); + } else { + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); + } + + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3) { + // Perform various simplifications. + switch (Opcode) { + case ISD::FMA: { + ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); + ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); + ConstantFPSDNode *N3CFP = dyn_cast<ConstantFPSDNode>(N3); + if (N1CFP && N2CFP && N3CFP) { + APFloat V1 = N1CFP->getValueAPF(); + const APFloat &V2 = N2CFP->getValueAPF(); + const APFloat &V3 = N3CFP->getValueAPF(); + APFloat::opStatus s = + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp) + return getConstantFP(V1, DL, VT); + } + break; + } + case ISD::CONCAT_VECTORS: { + // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::SETCC: { + // Use FoldSetCC to simplify SETCC's. + if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL)) + return V; + // Vector constant folding. + SDValue Ops[] = {N1, N2, N3}; + if (SDValue V = FoldConstantVectorArithmetic(Opcode, DL, VT, Ops)) + return V; + break; + } + case ISD::SELECT: + if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) { + if (N1C->getZExtValue()) + return N2; // select true, X, Y -> X + return N3; // select false, X, Y -> Y + } + + if (N2 == N3) return N2; // select C, X, X -> X + break; + case ISD::VECTOR_SHUFFLE: + llvm_unreachable("should use getVectorShuffle constructor!"); + case ISD::INSERT_SUBVECTOR: { + SDValue Index = N3; + if (VT.isSimple() && N1.getValueType().isSimple() + && N2.getValueType().isSimple()) { + assert(VT.isVector() && N1.getValueType().isVector() && + N2.getValueType().isVector() && + "Insert subvector VTs must be a vectors"); + assert(VT == N1.getValueType() && + "Dest and insert subvector source types must match!"); + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && + "Insert subvector must be from smaller vector to larger vector!"); + if (isa<ConstantSDNode>(Index)) { + assert((N2.getValueType().getVectorNumElements() + + cast<ConstantSDNode>(Index)->getZExtValue() + <= VT.getVectorNumElements()) + && "Insert subvector overflow!"); + } + + // Trivial insertion. + if (VT.getSimpleVT() == N2.getSimpleValueType()) + return N2; + } + break; + } + case ISD::BITCAST: + // Fold bit_convert nodes from a type to themselves. + if (N1.getValueType() == VT) + return N1; + break; + } + + // Memoize node if it doesn't produce a flag. + SDNode *N; + SDVTList VTs = getVTList(VT); + if (VT != MVT::Glue) { + SDValue Ops[] = { N1, N2, N3 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); + } + + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VT, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VT, Ops); +} + +/// getStackArgumentTokenFactor - Compute a TokenFactor to force all +/// the incoming stack arguments to be loaded from the stack. +SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { + SmallVector<SDValue, 8> ArgChains; + + // Include the original chain at the beginning of the list. When this is + // used by target LowerCall hooks, this helps legalize find the + // CALLSEQ_BEGIN node. + ArgChains.push_back(Chain); + + // Add a chain value for each stack argument. + for (SDNode::use_iterator U = getEntryNode().getNode()->use_begin(), + UE = getEntryNode().getNode()->use_end(); U != UE; ++U) + if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U)) + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) + if (FI->getIndex() < 0) + ArgChains.push_back(SDValue(L, 1)); + + // Build a tokenfactor for all the chains. + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); +} + +/// getMemsetValue - Vectorized representation of the memset value +/// operand. +static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, + SDLoc dl) { + assert(Value.getOpcode() != ISD::UNDEF); + + unsigned NumBits = VT.getScalarType().getSizeInBits(); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) { + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); + if (VT.isInteger()) + return DAG.getConstant(Val, dl, VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), dl, + VT); + } + + assert(Value.getValueType() == MVT::i8 && "memset with non-byte fill value?"); + EVT IntVT = VT.getScalarType(); + if (!IntVT.isInteger()) + IntVT = EVT::getIntegerVT(*DAG.getContext(), IntVT.getSizeInBits()); + + Value = DAG.getNode(ISD::ZERO_EXTEND, dl, IntVT, Value); + if (NumBits > 8) { + // Use a multiplication with 0x010101... to extend the input to the + // required length. + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + Value = DAG.getNode(ISD::MUL, dl, IntVT, Value, + DAG.getConstant(Magic, dl, IntVT)); + } + + if (VT != Value.getValueType() && !VT.isInteger()) + Value = DAG.getNode(ISD::BITCAST, dl, VT.getScalarType(), Value); + if (VT != Value.getValueType()) { + assert(VT.getVectorElementType() == Value.getValueType() && + "value type should be one vector element here"); + SmallVector<SDValue, 8> BVOps(VT.getVectorNumElements(), Value); + Value = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BVOps); + } + + return Value; +} + +/// getMemsetStringVal - Similar to getMemsetValue. Except this is only +/// used when a memcpy is turned into a memset when the source is a constant +/// string ptr. +static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, + const TargetLowering &TLI, StringRef Str) { + // Handle vector with all elements zero. + if (Str.empty()) { + if (VT.isInteger()) + return DAG.getConstant(0, dl, VT); + else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + return DAG.getConstantFP(0.0, dl, VT); + else if (VT.isVector()) { + unsigned NumElts = VT.getVectorNumElements(); + MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64; + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getConstant(0, dl, + EVT::getVectorVT(*DAG.getContext(), + EltVT, NumElts))); + } else + llvm_unreachable("Expected type!"); + } + + assert(!VT.isVector() && "Can't handle vector type here!"); + unsigned NumVTBits = VT.getSizeInBits(); + unsigned NumVTBytes = NumVTBits / 8; + unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); + + APInt Val(NumVTBits, 0); + if (DAG.getDataLayout().isLittleEndian()) { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << i*8; + } else { + for (unsigned i = 0; i != NumBytes; ++i) + Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; + } + + // If the "cost" of materializing the integer immediate is less than the cost + // of a load, then it is cost effective to turn the load into the immediate. + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + if (TLI.shouldConvertConstantLoadToIntImm(Val, Ty)) + return DAG.getConstant(Val, dl, VT); + return SDValue(nullptr, 0); +} + +/// getMemBasePlusOffset - Returns base and offset node for the +/// +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, + SelectionDAG &DAG) { + EVT VT = Base.getValueType(); + return DAG.getNode(ISD::ADD, dl, + VT, Base, DAG.getConstant(Offset, dl, VT)); +} + +/// isMemSrcFromString - Returns true if memcpy source is a string constant. +/// +static bool isMemSrcFromString(SDValue Src, StringRef &Str) { + unsigned SrcDelta = 0; + GlobalAddressSDNode *G = nullptr; + if (Src.getOpcode() == ISD::GlobalAddress) + G = cast<GlobalAddressSDNode>(Src); + else if (Src.getOpcode() == ISD::ADD && + Src.getOperand(0).getOpcode() == ISD::GlobalAddress && + Src.getOperand(1).getOpcode() == ISD::Constant) { + G = cast<GlobalAddressSDNode>(Src.getOperand(0)); + SrcDelta = cast<ConstantSDNode>(Src.getOperand(1))->getZExtValue(); + } + if (!G) + return false; + + return getConstantStringInfo(G->getGlobal(), Str, SrcDelta, false); +} + +/// Determines the optimal series of memory ops to replace the memset / memcpy. +/// Return true if the number of memory ops is below the threshold (Limit). +/// It returns the types of the sequence of memory ops to perform +/// memset / memcpy by reference. +static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps, + unsigned Limit, uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool IsMemset, + bool ZeroMemset, + bool MemcpyStrSrc, + bool AllowOverlap, + SelectionDAG &DAG, + const TargetLowering &TLI) { + assert((SrcAlign == 0 || SrcAlign >= DstAlign) && + "Expecting memcpy / memset source to meet alignment requirement!"); + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, + IsMemset, ZeroMemset, MemcpyStrSrc, + DAG.getMachineFunction()); + + if (VT == MVT::Other) { + unsigned AS = 0; + if (DstAlign >= DAG.getDataLayout().getPointerPrefAlignment(AS) || + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign)) { + VT = TLI.getPointerTy(DAG.getDataLayout()); + } else { + switch (DstAlign & 7) { + case 0: VT = MVT::i64; break; + case 4: VT = MVT::i32; break; + case 2: VT = MVT::i16; break; + default: VT = MVT::i8; break; + } + } + + MVT LVT = MVT::i64; + while (!TLI.isTypeLegal(LVT)) + LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1); + assert(LVT.isInteger()); + + if (VT.bitsGT(LVT)) + VT = LVT; + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned VTSize = VT.getSizeInBits() / 8; + while (VTSize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; + if (VT.isVector() || VT.isFloatingPoint()) { + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && + TLI.isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + TLI.isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + // FIXME: Only does this for 64-bit or more since we don't have proper + // cost model for unaligned load / store. + bool Fast; + unsigned AS = 0; + if (NumMemOps && AllowOverlap && + VTSize >= 8 && NewVTSize < Size && + TLI.allowsMisalignedMemoryAccesses(VT, AS, DstAlign, &Fast) && Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(VT); + Size -= VTSize; + } + + return true; +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction()->optForMinSize(); + return MF.getFunction()->optForSize(); +} + +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Turn a memcpy of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memcpy to a series of load and store ops if the size operand falls + // below a certain threshold. + // TODO: In the AlwaysInline case, if the size is big then generate a loop + // rather than maybe a humongous number of loads and stores. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + StringRef Str; + bool CopyFromStr = isMemSrcFromString(Src, Str); + bool isZeroStr = CopyFromStr && Str.empty(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize); + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), + (isZeroStr ? 0 : SrcAlign), + false, false, CopyFromStr, true, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Align && + DAG.getDataLayout().exceedsNaturalStackAlignment(NewAlign)) + NewAlign /= 2; + + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + uint64_t SrcOff = 0, DstOff = 0; + for (unsigned i = 0; i != NumMemOps; ++i) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value, Store; + + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + SrcOff -= VTSize - Size; + DstOff -= VTSize - Size; + } + + if (CopyFromStr && + (isZeroStr || (VT.isInteger() && !VT.isVector()))) { + // It's unlikely a store of a vector immediate can be done in a single + // instruction. It would require a load from a constantpool first. + // We only handle zero vectors here. + // FIXME: Handle other cases where store of vector immediate is done in + // a single instruction. + Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); + if (Value.getNode()) + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, dl, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); + } + + if (!Store.getNode()) { + // The type might not be legal for the target. This should only happen + // if the type is smaller than a legal type, as on PPC, so the right + // thing to do is generate a LoadExt/StoreTrunc pair. These simplify + // to Load/Store if NVT==VT. + // FIXME does the case above also need this? + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + assert(NVT.bitsGE(VT)); + Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + getMemBasePlusOffset(Src, SrcOff, dl, DAG), + SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, + false, MinAlign(SrcAlign, SrcOff)); + Store = DAG.getTruncStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, dl, DAG), + DstPtrInfo.getWithOffset(DstOff), VT, isVol, + false, Align); + } + OutChains.push_back(Store); + SrcOff += VTSize; + DstOff += VTSize; + Size -= VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + bool AlwaysInline, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + // Turn a memmove of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memmove to a series of load and store ops if the size operand falls + // below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + unsigned SrcAlign = DAG.InferPtrAlignment(Src); + if (Align > SrcAlign) + SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); + + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, + (DstAlignCanChange ? 0 : Align), SrcAlign, + false, false, false, false, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + uint64_t SrcOff = 0, DstOff = 0; + SmallVector<SDValue, 8> LoadValues; + SmallVector<SDValue, 8> LoadChains; + SmallVector<SDValue, 8> OutChains; + unsigned NumMemOps = MemOps.size(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Value; + + Value = DAG.getLoad(VT, dl, Chain, + getMemBasePlusOffset(Src, SrcOff, dl, DAG), + SrcPtrInfo.getWithOffset(SrcOff), isVol, + false, false, SrcAlign); + LoadValues.push_back(Value); + LoadChains.push_back(Value.getValue(1)); + SrcOff += VTSize; + } + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains); + OutChains.clear(); + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + SDValue Store; + + Store = DAG.getStore(Chain, dl, LoadValues[i], + getMemBasePlusOffset(Dst, DstOff, dl, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); + OutChains.push_back(Store); + DstOff += VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. +static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, + SDValue Chain, SDValue Dst, + SDValue Src, uint64_t Size, + unsigned Align, bool isVol, + MachinePointerInfo DstPtrInfo) { + // Turn a memset of undef to nop. + if (Src.getOpcode() == ISD::UNDEF) + return Chain; + + // Expand memset to a series of load/store ops if the size operand + // falls below a certain threshold. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::vector<EVT> MemOps; + bool DstAlignCanChange = false; + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst); + if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) + DstAlignCanChange = true; + bool IsZeroVal = + isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue(); + if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), + Size, (DstAlignCanChange ? 0 : Align), 0, + true, IsZeroVal, false, true, DAG, TLI)) + return SDValue(); + + if (DstAlignCanChange) { + Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); + unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty); + if (NewAlign > Align) { + // Give the stack frame object a larger alignment if needed. + if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) + MFI->setObjectAlignment(FI->getIndex(), NewAlign); + Align = NewAlign; + } + } + + SmallVector<SDValue, 8> OutChains; + uint64_t DstOff = 0; + unsigned NumMemOps = MemOps.size(); + + // Find the largest store and generate the bit pattern for it. + EVT LargestVT = MemOps[0]; + for (unsigned i = 1; i < NumMemOps; i++) + if (MemOps[i].bitsGT(LargestVT)) + LargestVT = MemOps[i]; + SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + + for (unsigned i = 0; i < NumMemOps; i++) { + EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + DstOff -= VTSize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + SDValue Value = MemSetValue; + if (VT.bitsLT(LargestVT)) { + if (!LargestVT.isVector() && !VT.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else + Value = getMemsetValue(Src, VT, DAG, dl); + } + assert(Value.getValueType() == VT && "Value with wrong type."); + SDValue Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, dl, DAG), + DstPtrInfo.getWithOffset(DstOff), + isVol, false, Align); + OutChains.push_back(Store); + DstOff += VT.getSizeInBits() / 8; + Size -= VTSize; + } + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); +} + +static void checkAddrSpaceIsValidForLibcall(const TargetLowering *TLI, + unsigned AS) { + // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all + // pointer operands can be losslessly bitcasted to pointers of address space 0 + if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + report_fatal_error("cannot lower memory intrinsic in address space " + + Twine(AS)); + } +} + +SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, bool AlwaysInline, + bool isTailCall, MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memcpy to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memcpy with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(),Align, + isVol, false, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memcpy with target-specific + // code. If the target chooses to do this, this is the next best. + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemcpy( + *this, dl, Chain, Dst, Src, Size, Align, isVol, AlwaysInline, + DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // If we really need inline code and the target declined to provide it, + // use a (potentially long) sequence of loads and stores. + if (AlwaysInline) { + assert(ConstantSize && "AlwaysInline requires a constant size!"); + return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + true, DstPtrInfo, SrcPtrInfo); + } + + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + + // FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc + // memcpy is not guaranteed to be safe. libc memcpys aren't required to + // respect volatile, so they may do things like read or write memory + // beyond the given memory regions. But fixing this isn't easy, and most + // people don't care. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in SDLoc + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy(getDataLayout())), + std::move(Args), 0) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, bool isTailCall, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memmove to loads and stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memmove with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src, + ConstantSize->getZExtValue(), Align, isVol, + false, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memmove with target-specific + // code. If the target chooses to do this, this is the next best. + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemmove( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo, SrcPtrInfo); + if (Result.getNode()) + return Result; + } + + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + checkAddrSpaceIsValidForLibcall(TLI, SrcPtrInfo.getAddrSpace()); + + // FIXME: If the memmove is volatile, lowering it to plain libc memmove may + // not be safe. See memcpy above for more details. + + // Emit a library call. + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Ty = getDataLayout().getIntPtrType(*getContext()); + Entry.Node = Dst; Args.push_back(Entry); + Entry.Node = Src; Args.push_back(Entry); + Entry.Node = Size; Args.push_back(Entry); + // FIXME: pass in SDLoc + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy(getDataLayout())), + std::move(Args), 0) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, + SDValue Src, SDValue Size, + unsigned Align, bool isVol, bool isTailCall, + MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); + + // Check to see if we should lower the memset to stores first. + // For cases within the target-specified limits, this is the best choice. + ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + if (ConstantSize) { + // Memset with size zero? Just return the original chain. + if (ConstantSize->isNullValue()) + return Chain; + + SDValue Result = + getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), + Align, isVol, DstPtrInfo); + + if (Result.getNode()) + return Result; + } + + // Then check to see if we should lower the memset with target-specific + // code. If the target chooses to do this, this is the next best. + if (TSI) { + SDValue Result = TSI->EmitTargetCodeForMemset( + *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + if (Result.getNode()) + return Result; + } + + checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + + // Emit a library call. + Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext()); + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Entry.Node = Dst; Entry.Ty = IntPtrTy; + Args.push_back(Entry); + Entry.Node = Src; + Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); + Args.push_back(Entry); + Entry.Node = Size; + Entry.Ty = IntPtrTy; + Args.push_back(Entry); + + // FIXME: pass in SDLoc + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Type::getVoidTy(*getContext()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(getDataLayout())), + std::move(Args), 0) + .setDiscardResult() + .setTailCall(isTailCall); + + std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); + return CallResult.second; +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<AtomicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + // Allocate the operands array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the allocator is released. + // If the number of operands is less than 5 we use AtomicSDNode's internal + // storage. + unsigned NumOps = Ops.size(); + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate<SDUse>(NumOps) + : nullptr; + + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops.data(), DynOps, NumOps, MMO, + SuccessOrdering, FailureOrdering, + SynchScope); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, ArrayRef<SDValue> Ops, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomicCmpSwap( + unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, + unsigned Alignment, AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. + unsigned Flags = MachineMemOperand::MOVolatile; + Flags |= MachineMemOperand::MOLoad; + Flags |= MachineMemOperand::MOStore; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); + + return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO, + SuccessOrdering, FailureOrdering, SynchScope); +} + +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTs, SDValue Chain, SDValue Ptr, + SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + + SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, + SuccessOrdering, FailureOrdering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + const Value* PtrVal, + unsigned Alignment, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + // An atomic store does not load. An atomic load does not store. + // (An atomicrmw obviously both loads and stores.) + // For now, atomics are considered to be volatile always, and they are + // chained as such. + // FIXME: Volatile isn't really correct; we should keep track of atomic + // orderings in the memoperand. + unsigned Flags = MachineMemOperand::MOVolatile; + if (Opcode != ISD::ATOMIC_STORE) + Flags |= MachineMemOperand::MOLoad; + if (Opcode != ISD::ATOMIC_LOAD) + Flags |= MachineMemOperand::MOStore; + + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags, + MemVT.getStoreSize(), Alignment); + + return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO, + Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDValue Chain, + SDValue Ptr, SDValue Val, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + assert((Opcode == ISD::ATOMIC_LOAD_ADD || + Opcode == ISD::ATOMIC_LOAD_SUB || + Opcode == ISD::ATOMIC_LOAD_AND || + Opcode == ISD::ATOMIC_LOAD_OR || + Opcode == ISD::ATOMIC_LOAD_XOR || + Opcode == ISD::ATOMIC_LOAD_NAND || + Opcode == ISD::ATOMIC_LOAD_MIN || + Opcode == ISD::ATOMIC_LOAD_MAX || + Opcode == ISD::ATOMIC_LOAD_UMIN || + Opcode == ISD::ATOMIC_LOAD_UMAX || + Opcode == ISD::ATOMIC_SWAP || + Opcode == ISD::ATOMIC_STORE) && + "Invalid Atomic Op"); + + EVT VT = Val.getValueType(); + + SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : + getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Ptr, Val}; + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + EVT VT, SDValue Chain, + SDValue Ptr, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = {Chain, Ptr}; + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope); +} + +/// getMergeValues - Create a MERGE_VALUES node from the given operands. +SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, SDLoc dl) { + if (Ops.size() == 1) + return Ops[0]; + + SmallVector<EVT, 4> VTs; + VTs.reserve(Ops.size()); + for (unsigned i = 0; i < Ops.size(); ++i) + VTs.push_back(Ops[i].getValueType()); + return getNode(ISD::MERGE_VALUES, dl, getVTList(VTs), Ops); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, + ArrayRef<SDValue> Ops, + EVT MemVT, MachinePointerInfo PtrInfo, + unsigned Align, bool Vol, + bool ReadMem, bool WriteMem, unsigned Size) { + if (Align == 0) // Ensure that codegen never sees alignment 0 + Align = getEVTAlignment(MemVT); + + MachineFunction &MF = getMachineFunction(); + unsigned Flags = 0; + if (WriteMem) + Flags |= MachineMemOperand::MOStore; + if (ReadMem) + Flags |= MachineMemOperand::MOLoad; + if (Vol) + Flags |= MachineMemOperand::MOVolatile; + if (!Size) + Size = MemVT.getStoreSize(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, Size, Align); + + return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO); +} + +SDValue +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, + ArrayRef<SDValue> Ops, EVT MemVT, + MachineMemOperand *MMO) { + assert((Opcode == ISD::INTRINSIC_VOID || + Opcode == ISD::INTRINSIC_W_CHAIN || + Opcode == ISD::PREFETCH || + Opcode == ISD::LIFETIME_START || + Opcode == ISD::LIFETIME_END || + (Opcode <= INT_MAX && + (int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) && + "Opcode is not a memory-accessing opcode!"); + + // Memoize the node unless it returns a flag. + MemIntrinsicSDNode *N; + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + MemVT, MMO); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + MemVT, MMO); + } + InsertNode(N); + return SDValue(N, 0); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + int64_t Offset = 0) { + // If this is FI+Offset, we can model it. + if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) + return MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + FI->getIndex(), Offset); + + // If this is (FI+Offset1)+Offset2, we can model it. + if (Ptr.getOpcode() != ISD::ADD || + !isa<ConstantSDNode>(Ptr.getOperand(1)) || + !isa<FrameIndexSDNode>(Ptr.getOperand(0))) + return MachinePointerInfo(); + + int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + return MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI, + Offset + cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue()); +} + +/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a +/// MachinePointerInfo record from it. This is particularly useful because the +/// code generator has many cases where it doesn't bother passing in a +/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst". +static MachinePointerInfo InferPointerInfo(SelectionDAG &DAG, SDValue Ptr, + SDValue OffsetOp) { + // If the 'Offset' value isn't a constant, we can't handle this. + if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp)) + return InferPointerInfo(DAG, Ptr, OffsetNode->getSExtValue()); + if (OffsetOp.getOpcode() == ISD::UNDEF) + return InferPointerInfo(DAG, Ptr); + return MachinePointerInfo(); +} + + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Offset, + MachinePointerInfo PtrInfo, EVT MemVT, + bool isVolatile, bool isNonTemporal, bool isInvariant, + unsigned Alignment, const AAMDNodes &AAInfo, + const MDNode *Ranges) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(VT); + + unsigned Flags = MachineMemOperand::MOLoad; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + Flags |= MachineMemOperand::MOInvariant; + + // If we don't have a PtrInfo, infer the trivial frame index case to simplify + // clients. + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(*this, Ptr, Offset); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment, + AAInfo, Ranges); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); +} + +SDValue +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Offset, EVT MemVT, + MachineMemOperand *MMO) { + if (VT == MemVT) { + ExtType = ISD::NON_EXTLOAD; + } else if (ExtType == ISD::NON_EXTLOAD) { + assert(VT == MemVT && "Non-extending load from different memory type!"); + } else { + // Extending load. + assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be an extending load, not truncating!"); + assert(VT.isInteger() == MemVT.isInteger() && + "Cannot convert from FP to Int or Int -> FP!"); + assert(VT.isVector() == MemVT.isVector() && + "Cannot use an ext load to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == MemVT.getVectorNumElements()) && + "Cannot use an ext load to change the number of vector elements!"); + } + + bool Indexed = AM != ISD::UNINDEXED; + assert((Indexed || Offset.getOpcode() == ISD::UNDEF) && + "Unindexed load with an offset!"); + + SDVTList VTs = Indexed ? + getVTList(VT, Ptr.getValueType(), MVT::Other) : getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::LOAD, VTs, Ops); + ID.AddInteger(MemVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<LoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ExtType, + MemVT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachinePointerInfo PtrInfo, + bool isVolatile, bool isNonTemporal, + bool isInvariant, unsigned Alignment, + const AAMDNodes &AAInfo, + const MDNode *Ranges) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + AAInfo, Ranges); +} + +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, + MachinePointerInfo PtrInfo, EVT MemVT, + bool isVolatile, bool isNonTemporal, + bool isInvariant, unsigned Alignment, + const AAMDNodes &AAInfo) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + PtrInfo, MemVT, isVolatile, isNonTemporal, isInvariant, + Alignment, AAInfo); +} + + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + +SDValue +SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + LoadSDNode *LD = cast<LoadSDNode>(OrigLoad); + assert(LD->getOffset().getOpcode() == ISD::UNDEF && + "Load is already a indexed load!"); + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, + LD->getChain(), Base, Offset, LD->getPointerInfo(), + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + false, LD->getAlignment()); +} + +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, MachinePointerInfo PtrInfo, + bool isVolatile, bool isNonTemporal, + unsigned Alignment, const AAMDNodes &AAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(Val.getValueType()); + + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(*this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, + Val.getValueType().getStoreSize(), Alignment, + AAInfo); + + return getStore(Chain, dl, Val, Ptr, MMO); +} + +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, MachineMemOperand *MMO) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, MachinePointerInfo PtrInfo, + EVT SVT,bool isVolatile, bool isNonTemporal, + unsigned Alignment, + const AAMDNodes &AAInfo) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = getEVTAlignment(SVT); + + unsigned Flags = MachineMemOperand::MOStore; + if (isVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (isNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + + if (PtrInfo.V.isNull()) + PtrInfo = InferPointerInfo(*this, Ptr); + + MachineFunction &MF = getMachineFunction(); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment, + AAInfo); + + return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); +} + +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, EVT SVT, + MachineMemOperand *MMO) { + EVT VT = Val.getValueType(); + + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + if (VT == SVT) + return getStore(Chain, dl, Val, Ptr, MMO); + + assert(SVT.getScalarType().bitsLT(VT.getScalarType()) && + "Should only be a truncating store, not extending!"); + assert(VT.isInteger() == SVT.isInteger() && + "Can't do FP-INT conversion!"); + assert(VT.isVector() == SVT.isVector() && + "Cannot use trunc store to convert to or from a vector!"); + assert((!VT.isVector() || + VT.getVectorNumElements() == SVT.getVectorNumElements()) && + "Cannot use trunc store to change the number of vector elements!"); + + SDVTList VTs = getVTList(MVT::Other); + SDValue Undef = getUNDEF(Ptr.getValueType()); + SDValue Ops[] = { Chain, Val, Ptr, Undef }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); + ID.AddInteger(SVT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<StoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, + SDValue Offset, ISD::MemIndexedMode AM) { + StoreSDNode *ST = cast<StoreSDNode>(OrigStore); + assert(ST->getOffset().getOpcode() == ISD::UNDEF && + "Store is already a indexed store!"); + SDVTList VTs = getVTList(Base.getValueType(), MVT::Other); + SDValue Ops[] = { ST->getChain(), ST->getValue(), Base, Offset }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::STORE, VTs, Ops); + ID.AddInteger(ST->getMemoryVT().getRawBits()); + ID.AddInteger(ST->getRawSubclassData()); + ID.AddInteger(ST->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, + ST->isTruncatingStore(), + ST->getMemoryVT(), + ST->getMemOperand()); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getMaskedLoad(EVT VT, SDLoc dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue Src0, EVT MemVT, + MachineMemOperand *MMO, ISD::LoadExtType ExtTy) { + + SDVTList VTs = getVTList(VT, MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Src0 }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedLoadSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedLoadSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, VTs, + ExtTy, MemVT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedStore(SDValue Chain, SDLoc dl, SDValue Val, + SDValue Ptr, SDValue Mask, EVT MemVT, + MachineMemOperand *MMO, bool isTrunc) { + assert(Chain.getValueType() == MVT::Other && + "Invalid chain type"); + EVT VT = Val.getValueType(); + SDVTList VTs = getVTList(MVT::Other); + SDValue Ops[] = { Chain, Ptr, Mask, Val }; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedStoreSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = new (NodeAllocator) MaskedStoreSDNode(dl.getIROrder(), + dl.getDebugLoc(), Ops, 4, + VTs, isTrunc, MemVT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue +SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, SDLoc dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED, + MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedGatherSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + MaskedGatherSDNode *N = + new (NodeAllocator) MaskedGatherSDNode(dl.getIROrder(), dl.getDebugLoc(), + Ops, VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, SDLoc dl, + ArrayRef<SDValue> Ops, + MachineMemOperand *MMO) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), + MMO->isInvariant())); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl.getDebugLoc(), IP)) { + cast<MaskedScatterSDNode>(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + SDNode *N = + new (NodeAllocator) MaskedScatterSDNode(dl.getIROrder(), dl.getDebugLoc(), + Ops, VTs, VT, MMO); + CSEMap.InsertNode(N, IP); + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, + ArrayRef<SDUse> Ops) { + switch (Ops.size()) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, static_cast<const SDValue>(Ops[0])); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1]); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + // Copy from an SDUse array into an SDValue array for use with + // the regular getNode logic. + SmallVector<SDValue, 8> NewOps(Ops.begin(), Ops.end()); + return getNode(Opcode, DL, VT, NewOps); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, + ArrayRef<SDValue> Ops, const SDNodeFlags *Flags) { + unsigned NumOps = Ops.size(); + switch (NumOps) { + case 0: return getNode(Opcode, DL, VT); + case 1: return getNode(Opcode, DL, VT, Ops[0]); + case 2: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Flags); + case 3: return getNode(Opcode, DL, VT, Ops[0], Ops[1], Ops[2]); + default: break; + } + + switch (Opcode) { + default: break; + case ISD::CONCAT_VECTORS: { + // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF. + if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this)) + return V; + break; + } + case ISD::SELECT_CC: { + assert(NumOps == 5 && "SELECT_CC takes 5 operands!"); + assert(Ops[0].getValueType() == Ops[1].getValueType() && + "LHS and RHS of condition must have same type!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "True and False arms of SelectCC must have same type!"); + assert(Ops[2].getValueType() == VT && + "select_cc node must be of same type as true and false value!"); + break; + } + case ISD::BR_CC: { + assert(NumOps == 5 && "BR_CC takes 5 operands!"); + assert(Ops[2].getValueType() == Ops[3].getValueType() && + "LHS/RHS of comparison should match types!"); + break; + } + } + + // Memoize nodes. + SDNode *N; + SDVTList VTs = getVTList(VT); + + if (VT != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTs, Ops); + void *IP = nullptr; + + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops); + CSEMap.InsertNode(N, IP); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops); + } + + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, + ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops) { + return getNode(Opcode, DL, getVTList(ResultTys), Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, + ArrayRef<SDValue> Ops) { + if (VTList.NumVTs == 1) + return getNode(Opcode, DL, VTList.VTs[0], Ops); + +#if 0 + switch (Opcode) { + // FIXME: figure out how to safely handle things like + // int foo(int x) { return 1 << (x & 255); } + // int bar() { return foo(256); } + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + case ISD::SHL_PARTS: + if (N3.getOpcode() == ISD::SIGN_EXTEND_INREG && + cast<VTSDNode>(N3.getOperand(1))->getVT() != MVT::i1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + else if (N3.getOpcode() == ISD::AND) + if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) { + // If the and is only masking out bits that cannot effect the shift, + // eliminate the and. + unsigned NumBits = VT.getScalarType().getSizeInBits()*2; + if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1) + return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0)); + } + break; + } +#endif + + // Memoize the node unless it returns a flag. + SDNode *N; + unsigned NumOps = Ops.size(); + if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + return SDValue(E, 0); + + if (NumOps == 1) { + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); + } else if (NumOps == 2) { + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); + } else if (NumOps == 3) { + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops); + } + CSEMap.InsertNode(N, IP); + } else { + if (NumOps == 1) { + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); + } else if (NumOps == 2) { + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); + } else if (NumOps == 3) { + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); + } else { + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops); + } + } + InsertNode(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { + return getNode(Opcode, DL, VTList, None); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, + SDValue N1) { + SDValue Ops[] = { N1 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, + SDValue N1, SDValue N2) { + SDValue Ops[] = { N1, N2 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3) { + SDValue Ops[] = { N1, N2, N3 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4) { + SDValue Ops[] = { N1, N2, N3, N4 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, + SDValue N1, SDValue N2, SDValue N3, + SDValue N4, SDValue N5) { + SDValue Ops[] = { N1, N2, N3, N4, N5 }; + return getNode(Opcode, DL, VTList, Ops); +} + +SDVTList SelectionDAG::getVTList(EVT VT) { + return makeVTList(SDNode::getValueTypeList(VT), 1); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + +SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + +SDVTList SelectionDAG::getVTList(ArrayRef<EVT> VTs) { + unsigned NumVTs = VTs.size(); + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); + } + + void *IP = nullptr; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (!Result) { + EVT *Array = Allocator.Allocate<EVT>(NumVTs); + std::copy(VTs.begin(), VTs.end(), Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); +} + + +/// UpdateNodeOperands - *Mutate* the specified node in-place to have the +/// specified operands. If the resultant node already exists in the DAG, +/// this does not modify the specified node, instead it returns the node that +/// already exists. If the resultant node does not exist in the DAG, the +/// input node is returned. As a degenerate case, if you specify the same +/// input operands as the node already has, the input node is returned. +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { + assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op == N->getOperand(0)) return N; + + // See if the modified node already exists. + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = nullptr; + + // Now we update the operands. + N->OperandList[0].set(Op); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { + assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); + + // Check to see if there is no change. + if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) + return N; // No operands changed, just return the input node. + + // See if the modified node already exists. + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = nullptr; + + // Now we update the operands. + if (N->OperandList[0] != Op1) + N->OperandList[0].set(Op1); + if (N->OperandList[1] != Op2) + N->OperandList[1].set(Op2); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { + SDValue Ops[] = { Op1, Op2, Op3 }; + return UpdateNodeOperands(N, Ops); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4) { + SDValue Ops[] = { Op1, Op2, Op3, Op4 }; + return UpdateNodeOperands(N, Ops); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, + SDValue Op3, SDValue Op4, SDValue Op5) { + SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; + return UpdateNodeOperands(N, Ops); +} + +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); + assert(N->getNumOperands() == NumOps && + "Update with wrong number of operands"); + + // If no operands changed just return the input node. + if (std::equal(Ops.begin(), Ops.end(), N->op_begin())) + return N; + + // See if the modified node already exists. + void *InsertPos = nullptr; + if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, InsertPos)) + return Existing; + + // Nope it doesn't. Remove the node from its current place in the maps. + if (InsertPos) + if (!RemoveNodeFromCSEMaps(N)) + InsertPos = nullptr; + + // Now we update the operands. + for (unsigned i = 0; i != NumOps; ++i) + if (N->OperandList[i] != Ops[i]) + N->OperandList[i].set(Ops[i]); + + // If this gets put into a CSE map, add it. + if (InsertPos) CSEMap.InsertNode(N, InsertPos); + return N; +} + +/// DropOperands - Release the operands and set this node to have +/// zero operands. +void SDNode::DropOperands() { + // Unlike the code in MorphNodeTo that does this, we don't need to + // watch for dead nodes here. + for (op_iterator I = op_begin(), E = op_end(); I != E; ) { + SDUse &Use = *I++; + Use.set(SDValue()); + } +} + +/// SelectNodeTo - These are wrappers around MorphNodeTo that accept a +/// machine opcode. +/// +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, None); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT, ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return SelectNodeTo(N, MachineOpc, VTs, None); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, EVT VT4, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, + SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return SelectNodeTo(N, MachineOpc, VTs, Ops); +} + +SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, + SDVTList VTs,ArrayRef<SDValue> Ops) { + N = MorphNodeTo(N, ~MachineOpc, VTs, Ops); + // Reset the NodeID to -1. + N->setNodeId(-1); + return N; +} + +/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// the line number information on the merged node since it is not possible to +/// preserve the information that operation is associated with multiple lines. +/// This will make the debugger working better at -O0, were there is a higher +/// probability having other instructions associated with that line. +/// +/// For IROrder, we keep the smaller of the two +SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { + DebugLoc NLoc = N->getDebugLoc(); + if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) { + N->setDebugLoc(DebugLoc()); + } + unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); + N->setIROrder(Order); + return N; +} + +/// MorphNodeTo - This *mutates* the specified node to have the specified +/// return type, opcode, and operands. +/// +/// Note that MorphNodeTo returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. Note that the SDLoc need not be the same. +/// +/// Using MorphNodeTo is faster than creating a new node and swapping it in +/// with ReplaceAllUsesWith both because it often avoids allocating a new +/// node, and because it doesn't require CSE recalculation for any of +/// the node's users. +/// +/// However, note that MorphNodeTo recursively deletes dead nodes from the DAG. +/// As a consequence it isn't appropriate to use from within the DAG combiner or +/// the legalizer which maintain worklists that would need to be updated when +/// deleting things. +SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, + SDVTList VTs, ArrayRef<SDValue> Ops) { + unsigned NumOps = Ops.size(); + // If an identical node already exists, use it. + void *IP = nullptr; + if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opc, VTs, Ops); + if (SDNode *ON = FindNodeOrInsertPos(ID, N->getDebugLoc(), IP)) + return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); + } + + if (!RemoveNodeFromCSEMaps(N)) + IP = nullptr; + + // Start the morphing. + N->NodeType = Opc; + N->ValueList = VTs.VTs; + N->NumValues = VTs.NumVTs; + + // Clear the operands list, updating used nodes to remove this from their + // use list. Keep track of any operands that become dead as a result. + SmallPtrSet<SDNode*, 16> DeadNodeSet; + for (SDNode::op_iterator I = N->op_begin(), E = N->op_end(); I != E; ) { + SDUse &Use = *I++; + SDNode *Used = Use.getNode(); + Use.set(SDValue()); + if (Used->use_empty()) + DeadNodeSet.insert(Used); + } + + if (MachineSDNode *MN = dyn_cast<MachineSDNode>(N)) { + // Initialize the memory references information. + MN->setMemRefs(nullptr, nullptr); + // If NumOps is larger than the # of operands we can have in a + // MachineSDNode, reallocate the operand list. + if (NumOps > MN->NumOperands || !MN->OperandsNeedDelete) { + if (MN->OperandsNeedDelete) + delete[] MN->OperandList; + if (NumOps > array_lengthof(MN->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops.data(), NumOps); + else + MN->InitOperands(MN->LocalOperands, Ops.data(), NumOps); + MN->OperandsNeedDelete = false; + } else + MN->InitOperands(MN->OperandList, Ops.data(), NumOps); + } else { + // If NumOps is larger than the # of operands we currently have, reallocate + // the operand list. + if (NumOps > N->NumOperands) { + if (N->OperandsNeedDelete) + delete[] N->OperandList; + N->InitOperands(new SDUse[NumOps], Ops.data(), NumOps); + N->OperandsNeedDelete = true; + } else + N->InitOperands(N->OperandList, Ops.data(), NumOps); + } + + // Delete any nodes that are still dead after adding the uses for the + // new operands. + if (!DeadNodeSet.empty()) { + SmallVector<SDNode *, 16> DeadNodes; + for (SDNode *N : DeadNodeSet) + if (N->use_empty()) + DeadNodes.push_back(N); + RemoveDeadNodes(DeadNodes); + } + + if (IP) + CSEMap.InsertNode(N, IP); // Memoize the new node. + return N; +} + + +/// getMachineNode - These are used for target selectors to create a new node +/// with specified return type(s), MachineInstr opcode, and operands. +/// +/// Note that getMachineNode returns the resultant node. If there is already a +/// node of the specified opcode and operands, it returns that node instead of +/// the current one. +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, None); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, None); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, SDValue Op1) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, SDValue Op1, + SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, EVT VT3, + SDValue Op1, SDValue Op2, SDValue Op3) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + SDValue Ops[] = { Op1, Op2, Op3 }; + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + EVT VT1, EVT VT2, EVT VT3, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2, VT3); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, + EVT VT2, EVT VT3, EVT VT4, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, + ArrayRef<EVT> ResultTys, + ArrayRef<SDValue> Ops) { + SDVTList VTs = getVTList(ResultTys); + return getMachineNode(Opcode, dl, VTs, Ops); +} + +MachineSDNode * +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, + ArrayRef<SDValue> OpsArray) { + bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; + MachineSDNode *N; + void *IP = nullptr; + const SDValue *Ops = OpsArray.data(); + unsigned NumOps = OpsArray.size(); + + if (DoCSE) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, ~Opcode, VTs, OpsArray); + IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL)); + } + } + + // Allocate a new MachineSDNode. + N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs); + + // Initialize the operands list. + if (NumOps > array_lengthof(N->LocalOperands)) + // We're creating a final node that will live unmorphed for the + // remainder of the current SelectionDAG iteration, so we can allocate + // the operands directly out of a pool with no recycling metadata. + N->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), + Ops, NumOps); + else + N->InitOperands(N->LocalOperands, Ops, NumOps); + N->OperandsNeedDelete = false; + + if (DoCSE) + CSEMap.InsertNode(N, IP); + + InsertNode(N); + return N; +} + +/// getTargetExtractSubreg - A convenience function for creating +/// TargetOpcode::EXTRACT_SUBREG nodes. +SDValue +SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, + SDValue Operand) { + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); + SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, + VT, Operand, SRIdxVal); + return SDValue(Subreg, 0); +} + +/// getTargetInsertSubreg - A convenience function for creating +/// TargetOpcode::INSERT_SUBREG nodes. +SDValue +SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, + SDValue Operand, SDValue Subreg) { + SDValue SRIdxVal = getTargetConstant(SRIdx, DL, MVT::i32); + SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, + VT, Operand, Subreg, SRIdxVal); + return SDValue(Result, 0); +} + +/// getNodeIfExists - Get the specified node if it's already available, or +/// else return NULL. +SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, + ArrayRef<SDValue> Ops, + const SDNodeFlags *Flags) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { + FoldingSetNodeID ID; + AddNodeIDNode(ID, Opcode, VTList, Ops); + AddNodeIDFlags(ID, Opcode, Flags); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) + return E; + } + return nullptr; +} + +/// getDbgValue - Creates a SDDbgValue node. +/// +/// SDNode +SDDbgValue *SelectionDAG::getDbgValue(MDNode *Var, MDNode *Expr, SDNode *N, + unsigned R, bool IsIndirect, uint64_t Off, + DebugLoc DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) + SDDbgValue(Var, Expr, N, R, IsIndirect, Off, DL, O); +} + +/// Constant +SDDbgValue *SelectionDAG::getConstantDbgValue(MDNode *Var, MDNode *Expr, + const Value *C, uint64_t Off, + DebugLoc DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, C, Off, DL, O); +} + +/// FrameIndex +SDDbgValue *SelectionDAG::getFrameIndexDbgValue(MDNode *Var, MDNode *Expr, + unsigned FI, uint64_t Off, + DebugLoc DL, unsigned O) { + assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + return new (DbgInfo->getAlloc()) SDDbgValue(Var, Expr, FI, Off, DL, O); +} + +namespace { + +/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node +/// pointed to by a use iterator is deleted, increment the use iterator +/// so that it doesn't dangle. +/// +class RAUWUpdateListener : public SelectionDAG::DAGUpdateListener { + SDNode::use_iterator &UI; + SDNode::use_iterator &UE; + + void NodeDeleted(SDNode *N, SDNode *E) override { + // Increment the iterator as needed. + while (UI != UE && N == *UI) + ++UI; + } + +public: + RAUWUpdateListener(SelectionDAG &d, + SDNode::use_iterator &ui, + SDNode::use_iterator &ue) + : SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {} +}; + +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes From has a single result value. +/// +void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) { + SDNode *From = FromN.getNode(); + assert(From->getNumValues() == 1 && FromN.getResNo() == 0 && + "Cannot replace with this method!"); + assert(From != To.getNode() && "Cannot replace uses of with self"); + + // Iterate over all the existing uses of From. New uses will be added + // to the beginning of the use list, which we avoid visiting. + // This specifically avoids visiting uses of From that arise while the + // replacement is happening, because any such uses would be the result + // of CSE: If an existing node looks like From after one of its operands + // is replaced by To, we don't want to replace of all its users with To + // too. See PR3018 for more info. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version assumes that for each value of From, there is a +/// corresponding value in To in the same position with the same type. +/// +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To) { +#ifndef NDEBUG + for (unsigned i = 0, e = From->getNumValues(); i != e; ++i) + assert((!From->hasAnyUseOfValue(i) || + From->getValueType(i) == To->getValueType(i)) && + "Cannot use this version of ReplaceAllUsesWith!"); +#endif + + // Handle the trivial case. + if (From == To) + return; + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + ++UI; + Use.setNode(To); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); +} + +/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. +/// This can cause recursive merging of nodes in the DAG. +/// +/// This version can replace From with any result values. To must match the +/// number and types of values returned by From. +void SelectionDAG::ReplaceAllUsesWith(SDNode *From, const SDValue *To) { + if (From->getNumValues() == 1) // Handle the simple case efficiently. + return ReplaceAllUsesWith(SDValue(From, 0), To[0]); + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From->use_begin(), UE = From->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + const SDValue &ToOp = To[Use.getResNo()]; + ++UI; + Use.set(ToOp); + } while (UI != UE && *UI == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); +} + +/// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The Deleted +/// vector is handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){ + // Handle the really simple, really trivial case efficiently. + if (From == To) return; + + // Handle the simple, trivial, case efficiently. + if (From.getNode()->getNumValues() == 1) { + ReplaceAllUsesWith(From, To); + return; + } + + // Iterate over just the existing users of From. See the comments in + // the ReplaceAllUsesWith above. + SDNode::use_iterator UI = From.getNode()->use_begin(), + UE = From.getNode()->use_end(); + RAUWUpdateListener Listener(*this, UI, UE); + while (UI != UE) { + SDNode *User = *UI; + bool UserRemovedFromCSEMaps = false; + + // A user can appear in a use list multiple times, and when this + // happens the uses are usually next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + SDUse &Use = UI.getUse(); + + // Skip uses of different values from the same node. + if (Use.getResNo() != From.getResNo()) { + ++UI; + continue; + } + + // If this node hasn't been modified yet, it's still in the CSE maps, + // so remove its old self from the CSE maps. + if (!UserRemovedFromCSEMaps) { + RemoveNodeFromCSEMaps(User); + UserRemovedFromCSEMaps = true; + } + + ++UI; + Use.set(To); + } while (UI != UE && *UI == User); + + // We are iterating over all uses of the From node, so if a use + // doesn't use the specific value, no changes are made. + if (!UserRemovedFromCSEMaps) + continue; + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); +} + +namespace { + /// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith + /// to record information about a use. + struct UseMemo { + SDNode *User; + unsigned Index; + SDUse *Use; + }; + + /// operator< - Sort Memos by User. + bool operator<(const UseMemo &L, const UseMemo &R) { + return (intptr_t)L.User < (intptr_t)R.User; + } +} + +/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving +/// uses of other values produced by From.getNode() alone. The same value +/// may appear in both the From and To list. The Deleted vector is +/// handled the same way as for ReplaceAllUsesWith. +void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From, + const SDValue *To, + unsigned Num){ + // Handle the simple, trivial case efficiently. + if (Num == 1) + return ReplaceAllUsesOfValueWith(*From, *To); + + // Read up all the uses and make records of them. This helps + // processing new uses that are introduced during the + // replacement process. + SmallVector<UseMemo, 4> Uses; + for (unsigned i = 0; i != Num; ++i) { + unsigned FromResNo = From[i].getResNo(); + SDNode *FromNode = From[i].getNode(); + for (SDNode::use_iterator UI = FromNode->use_begin(), + E = FromNode->use_end(); UI != E; ++UI) { + SDUse &Use = UI.getUse(); + if (Use.getResNo() == FromResNo) { + UseMemo Memo = { *UI, i, &Use }; + Uses.push_back(Memo); + } + } + } + + // Sort the uses, so that all the uses from a given User are together. + std::sort(Uses.begin(), Uses.end()); + + for (unsigned UseIndex = 0, UseIndexEnd = Uses.size(); + UseIndex != UseIndexEnd; ) { + // We know that this user uses some value of From. If it is the right + // value, update it. + SDNode *User = Uses[UseIndex].User; + + // This node is about to morph, remove its old self from the CSE maps. + RemoveNodeFromCSEMaps(User); + + // The Uses array is sorted, so all the uses for a given User + // are next to each other in the list. + // To help reduce the number of CSE recomputations, process all + // the uses of this user that we can find this way. + do { + unsigned i = Uses[UseIndex].Index; + SDUse &Use = *Uses[UseIndex].Use; + ++UseIndex; + + Use.set(To[i]); + } while (UseIndex != UseIndexEnd && Uses[UseIndex].User == User); + + // Now that we have modified User, add it back to the CSE maps. If it + // already exists there, recursively merge the results together. + AddModifiedNodeToCSEMaps(User); + } +} + +/// AssignTopologicalOrder - Assign a unique node id for each node in the DAG +/// based on their topological order. It returns the maximum id and a vector +/// of the SDNodes* in assigned order by reference. +unsigned SelectionDAG::AssignTopologicalOrder() { + + unsigned DAGSize = 0; + + // SortedPos tracks the progress of the algorithm. Nodes before it are + // sorted, nodes after it are unsorted. When the algorithm completes + // it is at the end of the list. + allnodes_iterator SortedPos = allnodes_begin(); + + // Visit all the nodes. Move nodes with no operands to the front of + // the list immediately. Annotate nodes that do have operands with their + // operand count. Before we do this, the Node Id fields of the nodes + // may contain arbitrary values. After, the Node Id fields for nodes + // before SortedPos will contain the topological sort index, and the + // Node Id fields for nodes At SortedPos and after will contain the + // count of outstanding operands. + for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { + SDNode *N = &*I++; + checkForCycles(N, this); + unsigned Degree = N->getNumOperands(); + if (Degree == 0) { + // A node with no uses, add it to the result array immediately. + N->setNodeId(DAGSize++); + allnodes_iterator Q(N); + if (Q != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(Q)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Temporarily use the Node Id as scratch space for the degree count. + N->setNodeId(Degree); + } + } + + // Visit all the nodes. As we iterate, move nodes into sorted order, + // such that by the time the end is reached all nodes will be sorted. + for (SDNode &Node : allnodes()) { + SDNode *N = &Node; + checkForCycles(N, this); + // N is in sorted position, so all its uses have one less operand + // that needs to be sorted. + for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); + UI != UE; ++UI) { + SDNode *P = *UI; + unsigned Degree = P->getNodeId(); + assert(Degree != 0 && "Invalid node degree"); + --Degree; + if (Degree == 0) { + // All of P's operands are sorted, so P may sorted now. + P->setNodeId(DAGSize++); + if (P != SortedPos) + SortedPos = AllNodes.insert(SortedPos, AllNodes.remove(P)); + assert(SortedPos != AllNodes.end() && "Overran node list"); + ++SortedPos; + } else { + // Update P's outstanding operand count. + P->setNodeId(Degree); + } + } + if (&Node == SortedPos) { +#ifndef NDEBUG + allnodes_iterator I(N); + SDNode *S = &*++I; + dbgs() << "Overran sorted position:\n"; + S->dumprFull(this); dbgs() << "\n"; + dbgs() << "Checking if this is due to cycles\n"; + checkForCycles(this, true); +#endif + llvm_unreachable(nullptr); + } + } + + assert(SortedPos == AllNodes.end() && + "Topological sort incomplete!"); + assert(AllNodes.front().getOpcode() == ISD::EntryToken && + "First node in topological sort is not the entry token!"); + assert(AllNodes.front().getNodeId() == 0 && + "First node in topological sort has non-zero id!"); + assert(AllNodes.front().getNumOperands() == 0 && + "First node in topological sort has operands!"); + assert(AllNodes.back().getNodeId() == (int)DAGSize-1 && + "Last node in topologic sort has unexpected id!"); + assert(AllNodes.back().use_empty() && + "Last node in topologic sort has users!"); + assert(DAGSize == allnodes_size() && "Node count mismatch!"); + return DAGSize; +} + +/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the +/// value is produced by SD. +void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { + if (SD) { + assert(DbgInfo->getSDDbgValues(SD).empty() || SD->getHasDebugValue()); + SD->setHasDebugValue(true); + } + DbgInfo->add(DB, SD, isParameter); +} + +/// TransferDbgValues - Transfer SDDbgValues. +void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { + if (From == To || !From.getNode()->getHasDebugValue()) + return; + SDNode *FromNode = From.getNode(); + SDNode *ToNode = To.getNode(); + ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode); + SmallVector<SDDbgValue *, 2> ClonedDVs; + for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end(); + I != E; ++I) { + SDDbgValue *Dbg = *I; + if (Dbg->getKind() == SDDbgValue::SDNODE) { + SDDbgValue *Clone = + getDbgValue(Dbg->getVariable(), Dbg->getExpression(), ToNode, + To.getResNo(), Dbg->isIndirect(), Dbg->getOffset(), + Dbg->getDebugLoc(), Dbg->getOrder()); + ClonedDVs.push_back(Clone); + } + } + for (SmallVectorImpl<SDDbgValue *>::iterator I = ClonedDVs.begin(), + E = ClonedDVs.end(); I != E; ++I) + AddDbgValue(*I, ToNode, false); +} + +//===----------------------------------------------------------------------===// +// SDNode Class +//===----------------------------------------------------------------------===// + +bool llvm::isNullConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isNullValue(); +} + +bool llvm::isNullFPConstant(SDValue V) { + ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(V); + return Const != nullptr && Const->isZero() && !Const->isNegative(); +} + +bool llvm::isAllOnesConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isAllOnesValue(); +} + +bool llvm::isOneConstant(SDValue V) { + ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); + return Const != nullptr && Const->isOne(); +} + +HandleSDNode::~HandleSDNode() { + DropOperands(); +} + +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, + DebugLoc DL, const GlobalValue *GA, + EVT VT, int64_t o, unsigned char TF) + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + TheGlobal = GA; +} + +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, + SDValue X, unsigned SrcAS, + unsigned DestAS) + : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(isNonTemporal() == MMO->isNonTemporal() && + "Non-temporal encoding error!"); + // We check here that the size of the memory operand fits within the size of + // the MMO. This is because the MMO might indicate only a possible address + // range instead of specifying the affected memory addresses precisely. + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); +} + +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + ArrayRef<SDValue> Ops, EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs, Ops), + MemoryVT(memvt), MMO(mmo) { + SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), + MMO->isNonTemporal(), MMO->isInvariant()); + assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); + assert(memvt.getStoreSize() <= MMO->getSize() && "Size mismatch!"); +} + +/// Profile - Gather unique data for the node. +/// +void SDNode::Profile(FoldingSetNodeID &ID) const { + AddNodeIDNode(ID, this); +} + +namespace { + struct EVTArray { + std::vector<EVT> VTs; + + EVTArray() { + VTs.reserve(MVT::LAST_VALUETYPE); + for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i) + VTs.push_back(MVT((MVT::SimpleValueType)i)); + } + }; +} + +static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs; +static ManagedStatic<EVTArray> SimpleVTArray; +static ManagedStatic<sys::SmartMutex<true> > VTMutex; + +/// getValueTypeList - Return a pointer to the specified value type. +/// +const EVT *SDNode::getValueTypeList(EVT VT) { + if (VT.isExtended()) { + sys::SmartScopedLock<true> Lock(*VTMutex); + return &(*EVTs->insert(VT).first); + } else { + assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE && + "Value type out of range!"); + return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy]; + } +} + +/// hasNUsesOfValue - Return true if there are exactly NUSES uses of the +/// indicated value. This method ignores uses of other values defined by this +/// operation. +bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + // TODO: Only iterate over uses of a given value of the node + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) { + if (UI.getUse().getResNo() == Value) { + if (NUses == 0) + return false; + --NUses; + } + } + + // Found exactly the right number of uses? + return NUses == 0; +} + + +/// hasAnyUseOfValue - Return true if there are any use of the indicated +/// value. This method ignores uses of other values defined by this operation. +bool SDNode::hasAnyUseOfValue(unsigned Value) const { + assert(Value < getNumValues() && "Bad value!"); + + for (SDNode::use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) + if (UI.getUse().getResNo() == Value) + return true; + + return false; +} + + +/// isOnlyUserOf - Return true if this node is the only use of N. +/// +bool SDNode::isOnlyUserOf(const SDNode *N) const { + bool Seen = false; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDNode *User = *I; + if (User == this) + Seen = true; + else + return false; + } + + return Seen; +} + +/// isOperand - Return true if this node is an operand of N. +/// +bool SDValue::isOperandOf(const SDNode *N) const { + for (const SDValue &Op : N->op_values()) + if (*this == Op) + return true; + return false; +} + +bool SDNode::isOperandOf(const SDNode *N) const { + for (const SDValue &Op : N->op_values()) + if (this == Op.getNode()) + return true; + return false; +} + +/// reachesChainWithoutSideEffects - Return true if this operand (which must +/// be a chain) reaches the specified operand without crossing any +/// side-effecting instructions on any chain path. In practice, this looks +/// through token factors and non-volatile loads. In order to remain efficient, +/// this only looks a couple of nodes in, it does not do an exhaustive search. +bool SDValue::reachesChainWithoutSideEffects(SDValue Dest, + unsigned Depth) const { + if (*this == Dest) return true; + + // Don't search too deeply, we just want to be able to see through + // TokenFactor's etc. + if (Depth == 0) return false; + + // If this is a token factor, all inputs to the TF happen in parallel. If any + // of the operands of the TF does not reach dest, then we cannot do the xform. + if (getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) + if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1)) + return false; + return true; + } + + // Loads don't have side effects, look through them. + if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) { + if (!Ld->isVolatile()) + return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1); + } + return false; +} + +/// hasPredecessor - Return true if N is a predecessor of this node. +/// N is either an operand of this node, or can be reached by recursively +/// traversing up the operands. +/// NOTE: This is an expensive method. Use it carefully. +bool SDNode::hasPredecessor(const SDNode *N) const { + SmallPtrSet<const SDNode *, 32> Visited; + SmallVector<const SDNode *, 16> Worklist; + return hasPredecessorHelper(N, Visited, Worklist); +} + +bool +SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSetImpl<const SDNode *> &Visited, + SmallVectorImpl<const SDNode *> &Worklist) const { + if (Visited.empty()) { + Worklist.push_back(this); + } else { + // Take a look in the visited set. If we've already encountered this node + // we needn't search further. + if (Visited.count(N)) + return true; + } + + // Haven't visited N yet. Continue the search. + while (!Worklist.empty()) { + const SDNode *M = Worklist.pop_back_val(); + for (const SDValue &OpV : M->op_values()) { + SDNode *Op = OpV.getNode(); + if (Visited.insert(Op).second) + Worklist.push_back(Op); + if (Op == N) + return true; + } + } + + return false; +} + +uint64_t SDNode::getConstantOperandVal(unsigned Num) const { + assert(Num < NumOperands && "Invalid child # of SDNode!"); + return cast<ConstantSDNode>(OperandList[Num])->getZExtValue(); +} + +const SDNodeFlags *SDNode::getFlags() const { + if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) + return &FlagsNode->Flags; + return nullptr; +} + +SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { + assert(N->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + SDLoc dl(N); + + SmallVector<SDValue, 8> Scalars; + SmallVector<SDValue, 4> Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + unsigned i; + for (i= 0; i != NE; ++i) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = + getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, + getConstant(i, dl, TLI->getVectorIdxTy(getDataLayout()))); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (N->getOpcode()) { + default: { + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands, + N->getFlags())); + break; + } + case ISD::VSELECT: + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT, Operands)); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); + break; + case ISD::SIGN_EXTEND_INREG: + case ISD::FP_ROUND_INREG: { + EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType(); + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + Operands[0], + getValueType(ExtVT))); + } + } + } + + for (; i < ResNE; ++i) + Scalars.push_back(getUNDEF(EltVT)); + + return getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars); +} + + +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist) const { + if (LD->getChain() != Base->getChain()) + return false; + EVT VT = LD->getValueType(0); + if (VT.getSizeInBits() / 8 != Bytes) + return false; + + SDValue Loc = LD->getOperand(1); + SDValue BaseLoc = Base->getOperand(1); + if (Loc.getOpcode() == ISD::FrameIndex) { + if (BaseLoc.getOpcode() != ISD::FrameIndex) + return false; + const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo(); + int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); + int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); + int FS = MFI->getObjectSize(FI); + int BFS = MFI->getObjectSize(BFI); + if (FS != BFS || FS != (int)Bytes) return false; + return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); + } + + // Handle X + C. + if (isBaseWithConstantOffset(Loc)) { + int64_t LocOffset = cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc) { + // If the base location is a simple address with no offset itself, then + // the second load's first add operand should be the base address. + if (LocOffset == Dist * (int)Bytes) + return true; + } else if (isBaseWithConstantOffset(BaseLoc)) { + // The base location itself has an offset, so subtract that value from the + // second load's offset before comparing to distance * size. + int64_t BOffset = + cast<ConstantSDNode>(BaseLoc.getOperand(1))->getSExtValue(); + if (Loc.getOperand(0) == BaseLoc.getOperand(0)) { + if ((LocOffset - BOffset) == Dist * (int)Bytes) + return true; + } + } + } + const GlobalValue *GV1 = nullptr; + const GlobalValue *GV2 = nullptr; + int64_t Offset1 = 0; + int64_t Offset2 = 0; + bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + if (isGA1 && isGA2 && GV1 == GV2) + return Offset1 == (Offset2 + Dist*Bytes); + return false; +} + + +/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if +/// it cannot be inferred. +unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { + // If this is a GlobalAddress + cst, return the alignment. + const GlobalValue *GV; + int64_t GVOffset = 0; + if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned PtrWidth = getDataLayout().getPointerTypeSizeInBits(GV->getType()); + APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); + llvm::computeKnownBits(const_cast<GlobalValue *>(GV), KnownZero, KnownOne, + getDataLayout()); + unsigned AlignBits = KnownZero.countTrailingOnes(); + unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; + if (Align) + return MinAlign(Align, GVOffset); + } + + // If this is a direct reference to a stack slot, use information about the + // stack slot's alignment. + int FrameIdx = 1 << 31; + int64_t FrameOffset = 0; + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) { + FrameIdx = FI->getIndex(); + } else if (isBaseWithConstantOffset(Ptr) && + isa<FrameIndexSDNode>(Ptr.getOperand(0))) { + // Handle FI+Cst + FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); + FrameOffset = Ptr.getConstantOperandVal(1); + } + + if (FrameIdx != (1 << 31)) { + const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); + unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), + FrameOffset); + return FIInfoAlign; + } + + return 0; +} + +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair<EVT, EVT> SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) { + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + } else { + unsigned NumElements = VT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NumElements/2); + } + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair<SDValue, SDValue> +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout()))); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), DL, + TLI->getVectorIdxTy(getDataLayout()))); + return std::make_pair(Lo, Hi); +} + +void SelectionDAG::ExtractVectorElements(SDValue Op, + SmallVectorImpl<SDValue> &Args, + unsigned Start, unsigned Count) { + EVT VT = Op.getValueType(); + if (Count == 0) + Count = VT.getVectorNumElements(); + + EVT EltVT = VT.getVectorElementType(); + EVT IdxTy = TLI->getVectorIdxTy(getDataLayout()); + SDLoc SL(Op); + for (unsigned i = Start, e = Start + Count; i != e; ++i) { + Args.push_back(getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, + Op, getConstant(i, SL, IdxTy))); + } +} + +// getAddressSpace - Return the address space this GlobalAddress belongs to. +unsigned GlobalAddressSDNode::getAddressSpace() const { + return getGlobal()->getType()->getAddressSpace(); +} + + +Type *ConstantPoolSDNode::getType() const { + if (isMachineConstantPoolEntry()) + return Val.MachineCPVal->getType(); + return Val.ConstVal->getType(); +} + +bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, + APInt &SplatUndef, + unsigned &SplatBitSize, + bool &HasAnyUndefs, + unsigned MinSplatBits, + bool isBigEndian) const { + EVT VT = getValueType(0); + assert(VT.isVector() && "Expected a vector type"); + unsigned sz = VT.getSizeInBits(); + if (MinSplatBits > sz) + return false; + + SplatValue = APInt(sz, 0); + SplatUndef = APInt(sz, 0); + + // Get the bits. Bits with undefined values (when the corresponding element + // of the vector is an ISD::UNDEF value) are set in SplatUndef and cleared + // in SplatValue. If any of the values are not constant, give up and return + // false. + unsigned int nOps = getNumOperands(); + assert(nOps > 0 && "isConstantSplat has 0-size build vector"); + unsigned EltBitSize = VT.getVectorElementType().getSizeInBits(); + + for (unsigned j = 0; j < nOps; ++j) { + unsigned i = isBigEndian ? nOps-1-j : j; + SDValue OpVal = getOperand(i); + unsigned BitPos = j * EltBitSize; + + if (OpVal.getOpcode() == ISD::UNDEF) + SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize); + else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) + SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize). + zextOrTrunc(sz) << BitPos; + else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) + SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos; + else + return false; + } + + // The build_vector is all constants or undefs. Find the smallest element + // size that splats the vector. + + HasAnyUndefs = (SplatUndef != 0); + while (sz > 8) { + + unsigned HalfSize = sz / 2; + APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatValue.trunc(HalfSize); + APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize); + APInt LowUndef = SplatUndef.trunc(HalfSize); + + // If the two halves do not match (ignoring undef bits), stop here. + if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) || + MinSplatBits > HalfSize) + break; + + SplatValue = HighValue | LowValue; + SplatUndef = HighUndef & LowUndef; + + sz = HalfSize; + } + + SplatBitSize = sz; + return true; +} + +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(getNumOperands()); + } + SDValue Splatted; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SDValue Op = getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) { + if (UndefElements) + (*UndefElements)[i] = true; + } else if (!Splatted) { + Splatted = Op; + } else if (Splatted != Op) { + return SDValue(); + } + } + + if (!Splatted) { + assert(getOperand(0).getOpcode() == ISD::UNDEF && + "Can only have a splat without a constant for all undefs."); + return getOperand(0); + } + + return Splatted; +} + +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements)); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements)); +} + +int32_t +BuildVectorSDNode::getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, + uint32_t BitWidth) const { + if (ConstantFPSDNode *CN = + dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements))) { + bool IsExact; + APSInt IntVal(BitWidth); + APFloat APF = CN->getValueAPF(); + if (APF.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact) != + APFloat::opOK || + !IsExact) + return -1; + + return IntVal.exactLogBase2(); + } + return -1; +} + +bool BuildVectorSDNode::isConstant() const { + for (const SDValue &Op : op_values()) { + unsigned Opc = Op.getOpcode(); + if (Opc != ISD::UNDEF && Opc != ISD::Constant && Opc != ISD::ConstantFP) + return false; + } + return true; +} + +bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { + // Find the first non-undef value in the shuffle mask. + unsigned i, e; + for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i) + /* search */; + + assert(i != e && "VECTOR_SHUFFLE node with all undef indices!"); + + // Make sure all remaining elements are either undef or the same as the first + // non-undef value. + for (int Idx = Mask[i]; i != e; ++i) + if (Mask[i] >= 0 && Mask[i] != Idx) + return false; + return true; +} + +#ifndef NDEBUG +static void checkForCyclesHelper(const SDNode *N, + SmallPtrSetImpl<const SDNode*> &Visited, + SmallPtrSetImpl<const SDNode*> &Checked, + const llvm::SelectionDAG *DAG) { + // If this node has already been checked, don't check it again. + if (Checked.count(N)) + return; + + // If a node has already been visited on this depth-first walk, reject it as + // a cycle. + if (!Visited.insert(N).second) { + errs() << "Detected cycle in SelectionDAG\n"; + dbgs() << "Offending node:\n"; + N->dumprFull(DAG); dbgs() << "\n"; + abort(); + } + + for (const SDValue &Op : N->op_values()) + checkForCyclesHelper(Op.getNode(), Visited, Checked, DAG); + + Checked.insert(N); + Visited.erase(N); +} +#endif + +void llvm::checkForCycles(const llvm::SDNode *N, + const llvm::SelectionDAG *DAG, + bool force) { +#ifndef NDEBUG + bool check = force; +#ifdef XDEBUG + check = true; +#endif // XDEBUG + if (check) { + assert(N && "Checking nonexistent SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked, DAG); + } +#endif // !NDEBUG +} + +void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { + checkForCycles(DAG->getRoot().getNode(), DAG, force); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp new file mode 100644 index 0000000..e446a93 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -0,0 +1,8613 @@ +//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +#include <utility> +using namespace llvm; + +#define DEBUG_TYPE "isel" + +/// LimitFloatPrecision - Generate low-precision inline sequences for +/// some float libcalls (6, 8 or 12 bits). +static unsigned LimitFloatPrecision; + +static cl::opt<unsigned, true> +LimitFPPrecision("limit-float-precision", + cl::desc("Generate low-precision inline sequences " + "for some float libcalls"), + cl::location(LimitFloatPrecision), + cl::init(0)); + +static cl::opt<bool> +EnableFMFInDAG("enable-fmf-dag", cl::init(true), cl::Hidden, + cl::desc("Enable fast-math-flags for DAG nodes")); + +// Limit the width of DAG chains. This is important in general to prevent +// DAG-based analysis from blowing up. For example, alias analysis and +// load clustering may not complete in reasonable time. It is difficult to +// recognize and avoid this situation within each individual analysis, and +// future analyses are likely to have the same behavior. Limiting DAG width is +// the safe approach and will be especially important with global DAGs. +// +// MaxParallelChains default is arbitrarily high to avoid affecting +// optimization, but could be lowered to improve compile time. Any ld-ld-st-st +// sequence over this should have been converted to llvm.memcpy by the +// frontend. It easy to induce this behavior with .ll code such as: +// %buffer = alloca [4096 x i8] +// %data = load [4096 x i8]* %argPtr +// store [4096 x i8] %data, [4096 x i8]* %buffer +static const unsigned MaxParallelChains = 64; + +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V); + +/// getCopyFromParts - Create a value that contains the specified legal parts +/// combined into the value they represent. If the parts combine to a type +/// larger then ValueVT then AssertOp can be used to specify whether the extra +/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT +/// (ISD::AssertSext). +static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, + const SDValue *Parts, + unsigned NumParts, MVT PartVT, EVT ValueVT, + const Value *V, + ISD::NodeType AssertOp = ISD::DELETED_NODE) { + if (ValueVT.isVector()) + return getCopyFromPartsVector(DAG, DL, Parts, NumParts, + PartVT, ValueVT, V); + + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + if (NumParts > 1) { + // Assemble the value from multiple parts. + if (ValueVT.isInteger()) { + unsigned PartBits = PartVT.getSizeInBits(); + unsigned ValueBits = ValueVT.getSizeInBits(); + + // Assemble the power of 2 part. + unsigned RoundParts = NumParts & (NumParts - 1) ? + 1 << Log2_32(NumParts) : NumParts; + unsigned RoundBits = PartBits * RoundParts; + EVT RoundVT = RoundBits == ValueBits ? + ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); + SDValue Lo, Hi; + + EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); + + if (RoundParts > 2) { + Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, + PartVT, HalfVT, V); + Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, + RoundParts / 2, PartVT, HalfVT, V); + } else { + Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); + } + + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + + Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); + + if (RoundParts < NumParts) { + // Assemble the trailing non-power-of-2 part. + unsigned OddParts = NumParts - RoundParts; + EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); + Hi = getCopyFromParts(DAG, DL, + Parts + RoundParts, OddParts, PartVT, OddVT, V); + + // Combine the round and odd parts. + Lo = Val; + if (DAG.getDataLayout().isBigEndian()) + std::swap(Lo, Hi); + EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); + Hi = + DAG.getNode(ISD::SHL, DL, TotalVT, Hi, + DAG.getConstant(Lo.getValueType().getSizeInBits(), DL, + TLI.getPointerTy(DAG.getDataLayout()))); + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); + Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); + } + } else if (PartVT.isFloatingPoint()) { + // FP split into multiple FP parts (for ppcf128) + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && + "Unexpected split"); + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); + Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); + if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout())) + std::swap(Lo, Hi); + Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); + } else { + // FP split into integer parts (soft fp) + assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && + !PartVT.isVector() && "Unexpected split"); + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); + } + } + + // There is now one part, held in Val. Correct it to match ValueVT. + EVT PartEVT = Val.getValueType(); + + if (PartEVT == ValueVT) + return Val; + + if (PartEVT.isInteger() && ValueVT.isFloatingPoint() && + ValueVT.bitsLT(PartEVT)) { + // For an FP value in an integer part, we need to truncate to the right + // width first. + PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val); + } + + if (PartEVT.isInteger() && ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartEVT)) { + // For a truncate, see if we have any information to + // indicate whether the truncated bits will always be + // zero or sign-extension. + if (AssertOp != ISD::DELETED_NODE) + Val = DAG.getNode(AssertOp, DL, PartEVT, Val, + DAG.getValueType(ValueVT)); + return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); + } + + if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + // FP_ROUND's are always exact here. + if (ValueVT.bitsLT(Val.getValueType())) + return DAG.getNode( + ISD::FP_ROUND, DL, ValueVT, Val, + DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout()))); + + return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); + } + + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + llvm_unreachable("Unknown mismatch!"); +} + +static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, + const Twine &ErrMsg) { + const Instruction *I = dyn_cast_or_null<Instruction>(V); + if (!V) + return Ctx.emitError(ErrMsg); + + const char *AsmError = ", possible invalid constraint for vector type"; + if (const CallInst *CI = dyn_cast<CallInst>(I)) + if (isa<InlineAsm>(CI->getCalledValue())) + return Ctx.emitError(I, ErrMsg + AsmError); + + return Ctx.emitError(I, ErrMsg); +} + +/// getCopyFromPartsVector - Create a value that contains the specified legal +/// parts combined into the value they represent. If the parts combine to a +/// type larger then ValueVT then AssertOp can be used to specify whether the +/// extra bits are known to be zero (ISD::AssertZext) or sign extended from +/// ValueVT (ISD::AssertSext). +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, + const SDValue *Parts, unsigned NumParts, + MVT PartVT, EVT ValueVT, const Value *V) { + assert(ValueVT.isVector() && "Not a vector value"); + assert(NumParts > 0 && "No parts to assemble!"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Val = Parts[0]; + + // Handle a multi-element vector. + if (NumParts > 1) { + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = + TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, + NumIntermediates, RegisterVT); + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + assert(RegisterVT.getSizeInBits() == + Parts[0].getSimpleValueType().getSizeInBits() && + "Part type sizes don't match!"); + + // Assemble the parts into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + if (NumIntermediates == NumParts) { + // If the register was not expanded, truncate or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, + PartVT, IntermediateVT, V); + } else if (NumParts > 0) { + // If the intermediate type was expanded, build the intermediate + // operands from the parts. + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, + PartVT, IntermediateVT, V); + } + + // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the + // intermediate operands. + Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS + : ISD::BUILD_VECTOR, + DL, ValueVT, Ops); + } + + // There is now one part, held in Val. Correct it to match ValueVT. + EVT PartEVT = Val.getValueType(); + + if (PartEVT == ValueVT) + return Val; + + if (PartEVT.isVector()) { + // If the element type of the source/dest vectors are the same, but the + // parts vector has more elements than the value vector, then we have a + // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the + // elements we want. + if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + "Cannot narrow, it would be a lossy transformation"); + return DAG.getNode( + ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Vector/Vector bitcast. + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + "Cannot handle this kind of promotion"); + // Promoted vector extract + return DAG.getAnyExtOrTrunc(Val, DL, ValueVT); + + } + + // Trivial bitcast if the types are the same size and the destination + // vector type is legal. + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && + TLI.isTypeLegal(ValueVT)) + return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + + // Handle cases such as i8 -> <1 x i1> + if (ValueVT.getVectorNumElements() != 1) { + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "non-trivial scalar-to-vector conversion"); + return DAG.getUNDEF(ValueVT); + } + + if (ValueVT.getVectorNumElements() == 1 && + ValueVT.getVectorElementType() != PartEVT) + Val = DAG.getAnyExtOrTrunc(Val, DL, ValueVT.getScalarType()); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); +} + +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V); + +/// getCopyToParts - Create a series of nodes that contain the specified value +/// split into legal parts. If the parts contain more bits than Val, then, for +/// integers, ExtendKind can be used to specify how to generate the extra bits. +static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V, + ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { + EVT ValueVT = Val.getValueType(); + + // Handle the vector case separately. + if (ValueVT.isVector()) + return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); + + unsigned PartBits = PartVT.getSizeInBits(); + unsigned OrigNumParts = NumParts; + assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && + "Copying to an illegal type!"); + + if (NumParts == 0) + return; + + assert(!ValueVT.isVector() && "Vector case handled elsewhere"); + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { + assert(NumParts == 1 && "No-op copy with multiple parts!"); + Parts[0] = Val; + return; + } + + if (NumParts * PartBits > ValueVT.getSizeInBits()) { + // If the parts cover more bits than the value has, promote the value. + if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + assert(NumParts == 1 && "Do not know what to promote to!"); + Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); + } else { + if (ValueVT.isFloatingPoint()) { + // FP values need to be bitcast, then extended if they are being put + // into a larger container. + ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); + Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); + } + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + } else if (PartBits == ValueVT.getSizeInBits()) { + // Different types of the same size. + assert(NumParts == 1 && PartEVT != ValueVT); + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { + // If the parts cover less bits than value has, truncate the value. + assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && + ValueVT.isInteger() && + "Unknown mismatch!"); + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + if (PartVT == MVT::x86mmx) + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } + + // The value may have changed - recompute ValueVT. + ValueVT = Val.getValueType(); + assert(NumParts * PartBits == ValueVT.getSizeInBits() && + "Failed to tile the value with PartVT!"); + + if (NumParts == 1) { + if (PartEVT != ValueVT) + diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, + "scalar-to-vector conversion failed"); + + Parts[0] = Val; + return; + } + + // Expand the value into multiple parts. + if (NumParts & (NumParts - 1)) { + // The number of parts is not a power of 2. Split off and copy the tail. + assert(PartVT.isInteger() && ValueVT.isInteger() && + "Do not know what to expand to!"); + unsigned RoundParts = 1 << Log2_32(NumParts); + unsigned RoundBits = RoundParts * PartBits; + unsigned OddParts = NumParts - RoundParts; + SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, + DAG.getIntPtrConstant(RoundBits, DL)); + getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); + + if (DAG.getDataLayout().isBigEndian()) + // The odd parts were reversed by getCopyToParts - unreverse them. + std::reverse(Parts + RoundParts, Parts + NumParts); + + NumParts = RoundParts; + ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); + Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); + } + + // The number of parts is a power of 2. Repeatedly bisect the value using + // EXTRACT_ELEMENT. + Parts[0] = DAG.getNode(ISD::BITCAST, DL, + EVT::getIntegerVT(*DAG.getContext(), + ValueVT.getSizeInBits()), + Val); + + for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { + for (unsigned i = 0; i < NumParts; i += StepSize) { + unsigned ThisBits = StepSize * PartBits / 2; + EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); + SDValue &Part0 = Parts[i]; + SDValue &Part1 = Parts[i+StepSize/2]; + + Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(1, DL)); + Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, + ThisVT, Part0, DAG.getIntPtrConstant(0, DL)); + + if (ThisBits == PartBits && ThisVT != PartVT) { + Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); + Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); + } + } + } + + if (DAG.getDataLayout().isBigEndian()) + std::reverse(Parts, Parts + OrigNumParts); +} + + +/// getCopyToPartsVector - Create a series of nodes that contain the specified +/// value split into legal parts. +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, + SDValue Val, SDValue *Parts, unsigned NumParts, + MVT PartVT, const Value *V) { + EVT ValueVT = Val.getValueType(); + assert(ValueVT.isVector() && "Not a vector"); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (NumParts == 1) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { + // Nothing to do. + } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { + // Bitconvert vector->vector case. + Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); + } else if (PartVT.isVector() && + PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && + PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + EVT ElementVT = PartVT.getVectorElementType(); + // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in + // undef elements. + SmallVector<SDValue, 16> Ops; + for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())))); + + for (unsigned i = ValueVT.getVectorNumElements(), + e = PartVT.getVectorNumElements(); i != e; ++i) + Ops.push_back(DAG.getUNDEF(ElementVT)); + + Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); + + // FIXME: Use CONCAT for 2x -> 4x. + + //SDValue UndefElts = DAG.getUNDEF(VectorTy); + //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); + } else if (PartVT.isVector() && + PartEVT.getVectorElementType().bitsGE( + ValueVT.getVectorElementType()) && + PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + + // Promoted vector extract + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } else{ + // Vector -> scalar conversion. + assert(ValueVT.getVectorNumElements() == 1 && + "Only trivial vector-to-scalar conversions should get here!"); + Val = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val, + DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + + Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT); + } + + Parts[0] = Val; + return; + } + + // Handle a multi-element vector. + EVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, + IntermediateVT, + NumIntermediates, RegisterVT); + unsigned NumElements = ValueVT.getVectorNumElements(); + + assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); + NumParts = NumRegs; // Silence a compiler warning. + assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); + + // Split the vector into intermediate operands. + SmallVector<SDValue, 8> Ops(NumIntermediates); + for (unsigned i = 0; i != NumIntermediates; ++i) { + if (IntermediateVT.isVector()) + Ops[i] = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, + DAG.getConstant(i * (NumElements / NumIntermediates), DL, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + else + Ops[i] = DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val, + DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + + // Split the intermediate operands into legal parts. + if (NumParts == NumIntermediates) { + // If the register was not expanded, promote or copy the value, + // as appropriate. + for (unsigned i = 0; i != NumParts; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); + } else if (NumParts > 0) { + // If the intermediate type was expanded, split each the value into + // legal parts. + assert(NumIntermediates != 0 && "division by zero"); + assert(NumParts % NumIntermediates == 0 && + "Must expand into a divisible number of parts!"); + unsigned Factor = NumParts / NumIntermediates; + for (unsigned i = 0; i != NumIntermediates; ++i) + getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); + } +} + +RegsForValue::RegsForValue() {} + +RegsForValue::RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, + EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + +RegsForValue::RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty) { + ComputeValueVTs(TLI, DL, Ty, ValueVTs); + + for (EVT ValueVT : ValueVTs) { + unsigned NumRegs = TLI.getNumRegisters(Context, ValueVT); + MVT RegisterVT = TLI.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + SDLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V) const { + // A Value with type {} or [0 x %t] needs no registers. + if (ValueVTs.empty()) + return SDValue(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector<SDValue, 4> Values(ValueVTs.size()); + SmallVector<SDValue, 8> Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + MVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (!Flag) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + Parts[i] = P; + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || + !RegisterVT.isInteger() || RegisterVT.isVector()) + continue; + + const FunctionLoweringInfo::LiveOutInfo *LOI = + FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); + if (!LOI) + continue; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI->NumSignBits; + unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + + if (NumZeroBits == RegSize) { + // The current value is a zero. + // Explicitly express that as it would be easier for + // optimizations to kick in. + Parts[i] = DAG.getConstant(0, dl, RegisterVT); + continue; + } + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + else + continue; + + // Add an assertion node. + assert(FromVT != MVT::Other); + Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT, V); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, + SDValue &Chain, SDValue *Flag, const Value *V, + ISD::NodeType PreferredExtendType) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ISD::NodeType ExtendKind = PreferredExtendType; + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector<SDValue, 8> Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + MVT RegisterVT = RegVTs[Value]; + + if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + ExtendKind = ISD::ZERO_EXTEND; + + getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT, V, ExtendKind); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector<SDValue, 8> Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (!Flag) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, SDLoc dl, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + else if (!Regs.empty() && + TargetRegisterInfo::isVirtualRegister(Regs.front())) { + // Put the register class of the virtual registers in the flag word. That + // way, later passes can recompute register class constraints for inline + // assembly as well as normal instructions. + // Don't do this for tied operands that can use the regclass information + // from the def. + const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); + Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); + } + + SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32); + Ops.push_back(Res); + + unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + MVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + unsigned TheReg = Regs[Reg++]; + Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); + + if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { + // If we clobbered the stack pointer, MFI should know about it. + assert(DAG.getMachineFunction().getFrameInfo()-> + hasOpaqueSPAdjustment()); + } + } + } +} + +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li) { + AA = &aa; + GFI = gfi; + LibInfo = li; + DL = &DAG.getDataLayout(); + Context = DAG.getContext(); + LPadToCallSiteMap.clear(); +} + +/// clear - Clear out the current SelectionDAG and the associated +/// state and prepare this SelectionDAGBuilder object to be used +/// for a new block. This doesn't clear out information about +/// additional blocks that are needed to complete switch lowering +/// or PHI node updating; that information is cleared out as it is +/// consumed. +void SelectionDAGBuilder::clear() { + NodeMap.clear(); + UnusedArgNodeMap.clear(); + PendingLoads.clear(); + PendingExports.clear(); + CurInst = nullptr; + HasTailCall = false; + SDNodeOrder = LowestSDNodeOrder; + StatepointLowering.clear(); +} + +/// clearDanglingDebugInfo - Clear the dangling debug information +/// map. This function is separated from the clear so that debug +/// information that is dangling in a basic block can be properly +/// resolved in a different basic block. This allows the +/// SelectionDAG to resolve dangling debug information attached +/// to PHI nodes. +void SelectionDAGBuilder::clearDanglingDebugInfo() { + DanglingDebugInfoMap.clear(); +} + +/// getRoot - Return the current virtual root of the Selection DAG, +/// flushing any PendingLoad items. This must be done before emitting +/// a store or any other node that may need to be ordered after any +/// prior load instructions. +/// +SDValue SelectionDAGBuilder::getRoot() { + if (PendingLoads.empty()) + return DAG.getRoot(); + + if (PendingLoads.size() == 1) { + SDValue Root = PendingLoads[0]; + DAG.setRoot(Root); + PendingLoads.clear(); + return Root; + } + + // Otherwise, we have to make a token factor node. + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + PendingLoads); + PendingLoads.clear(); + DAG.setRoot(Root); + return Root; +} + +/// getControlRoot - Similar to getRoot, but instead of flushing all the +/// PendingLoad items, flush all the PendingExports items. It is necessary +/// to do this before emitting a terminator instruction. +/// +SDValue SelectionDAGBuilder::getControlRoot() { + SDValue Root = DAG.getRoot(); + + if (PendingExports.empty()) + return Root; + + // Turn all of the CopyToReg chains into one factored node. + if (Root.getOpcode() != ISD::EntryToken) { + unsigned i = 0, e = PendingExports.size(); + for (; i != e; ++i) { + assert(PendingExports[i].getNode()->getNumOperands() > 1); + if (PendingExports[i].getNode()->getOperand(0) == Root) + break; // Don't add the root if we already indirectly depend on it. + } + + if (i == e) + PendingExports.push_back(Root); + } + + Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, + PendingExports); + PendingExports.clear(); + DAG.setRoot(Root); + return Root; +} + +void SelectionDAGBuilder::visit(const Instruction &I) { + // Set up outgoing PHI node register values before emitting the terminator. + if (isa<TerminatorInst>(&I)) + HandlePHINodesInSuccessorBlocks(I.getParent()); + + ++SDNodeOrder; + + CurInst = &I; + + visit(I.getOpcode(), I); + + if (!isa<TerminatorInst>(&I) && !HasTailCall && + !isStatepoint(&I)) // statepoints handle their exports internally + CopyToExportRegsIfNeeded(&I); + + CurInst = nullptr; +} + +void SelectionDAGBuilder::visitPHI(const PHINode &) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); +} + +void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { + // Note: this doesn't use InstVisitor, because it has to work with + // ConstantExpr's in addition to instructions. + switch (Opcode) { + default: llvm_unreachable("Unknown instruction type encountered!"); + // Build the switch statement using the Instruction.def file. +#define HANDLE_INST(NUM, OPCODE, CLASS) \ + case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; +#include "llvm/IR/Instruction.def" + } +} + +// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, +// generate the debug data structures now that we've seen its definition. +void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, + SDValue Val) { + DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; + if (DDI.getDI()) { + const DbgValueInst *DI = DDI.getDI(); + DebugLoc dl = DDI.getdl(); + unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); + DILocalVariable *Variable = DI->getVariable(); + DIExpression *Expr = DI->getExpression(); + assert(Variable->isValidLocationForIntrinsic(dl) && + "Expected inlined-at fields to agree"); + uint64_t Offset = DI->getOffset(); + SDDbgValue *SDV; + if (Val.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false, + Val)) { + SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), + false, Offset, dl, DbgSDNodeOrder); + DAG.AddDbgValue(SDV, Val.getNode(), false); + } + } else + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + DanglingDebugInfoMap[V] = DanglingDebugInfo(); + } +} + +/// getCopyFromRegs - If there was virtual register allocated for the value V +/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. +SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { + DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); + SDValue Result; + + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), + DAG.getDataLayout(), InReg, Ty); + SDValue Chain = DAG.getEntryNode(); + Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + resolveDanglingDebugInfo(V, Result); + } + + return Result; +} + +/// getValue - Return an SDValue for the given Value. +SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // If there's a virtual register allocated and initialized for this + // value, use it. + SDValue copyFromReg = getCopyFromRegs(V, V->getType()); + if (copyFromReg.getNode()) { + return copyFromReg; + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +// Return true if SDValue exists for the given Value +bool SelectionDAGBuilder::findValue(const Value *V) const { + return (NodeMap.find(V) != NodeMap.end()) || + (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end()); +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. + SDValue &N = NodeMap[V]; + if (N.getNode()) { + if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) { + // Remove the debug location from the node as the node is about to be used + // in a location which may differ from the original debug location. This + // is relevant to Constant and ConstantFP nodes because they can appear + // as constant expressions inside PHI nodes. + N->setDebugLoc(DebugLoc()); + } + return N; + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + resolveDanglingDebugInfo(V, Val); + return Val; +} + +/// getValueImpl - Helper function for getValue and getNonRegisterValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + if (const Constant *C = dyn_cast<Constant>(V)) { + EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true); + + if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) + return DAG.getConstant(*CI, getCurSDLoc(), VT); + + if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) + return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); + + if (isa<ConstantPointerNull>(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout(), AS)); + } + + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) + return DAG.getConstantFP(*CFP, getCurSDLoc(), VT); + + if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) + return DAG.getUNDEF(VT); + + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { + visit(CE->getOpcode(), *CE); + SDValue N1 = NodeMap[V]; + assert(N1.getNode() && "visit didn't populate the NodeMap!"); + return N1; + } + + if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { + SmallVector<SDValue, 4> Constants; + for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); + OI != OE; ++OI) { + SDNode *Val = getValue(*OI).getNode(); + // If the operand is an empty aggregate, there are no values. + if (!Val) continue; + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Constants.push_back(SDValue(Val, i)); + } + + return DAG.getMergeValues(Constants, getCurSDLoc()); + } + + if (const ConstantDataSequential *CDS = + dyn_cast<ConstantDataSequential>(C)) { + SmallVector<SDValue, 4> Ops; + for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { + SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); + // Add each leaf value from the operand to the Constants list + // to form a flattened list of all the values. + for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) + Ops.push_back(SDValue(Val, i)); + } + + if (isa<ArrayType>(CDS->getType())) + return DAG.getMergeValues(Ops, getCurSDLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), + VT, Ops); + } + + if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { + assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && + "Unknown struct or array constant!"); + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs); + unsigned NumElts = ValueVTs.size(); + if (NumElts == 0) + return SDValue(); // empty struct + SmallVector<SDValue, 4> Constants(NumElts); + for (unsigned i = 0; i != NumElts; ++i) { + EVT EltVT = ValueVTs[i]; + if (isa<UndefValue>(C)) + Constants[i] = DAG.getUNDEF(EltVT); + else if (EltVT.isFloatingPoint()) + Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT); + else + Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT); + } + + return DAG.getMergeValues(Constants, getCurSDLoc()); + } + + if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) + return DAG.getBlockAddress(BA, VT); + + VectorType *VecTy = cast<VectorType>(V->getType()); + unsigned NumElements = VecTy->getNumElements(); + + // Now that we know the number and type of the elements, get that number of + // elements into the Ops array based on what kind of constant it is. + SmallVector<SDValue, 16> Ops; + if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { + for (unsigned i = 0; i != NumElements; ++i) + Ops.push_back(getValue(CV->getOperand(i))); + } else { + assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); + EVT EltVT = + TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType()); + + SDValue Op; + if (EltVT.isFloatingPoint()) + Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT); + else + Op = DAG.getConstant(0, getCurSDLoc(), EltVT); + Ops.assign(NumElements, Op); + } + + // Create a BUILD_VECTOR node. + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); + } + + // If this is a static alloca, generate it as the frameindex instead of + // computation. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + return DAG.getFrameIndex(SI->second, + TLI.getPointerTy(DAG.getDataLayout())); + } + + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast<Instruction>(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg, + Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); + } + + llvm_unreachable("Can't get register for value!"); +} + +void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) { + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Pers == EHPersonality::CoreCLR; + MachineBasicBlock *CatchPadMBB = FuncInfo.MBB; + // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + CatchPadMBB->setIsEHFuncletEntry(); + + DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot())); +} + +void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) { + // Update machine-CFG edge. + MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()]; + FuncInfo.MBB->addSuccessor(TargetMBB); + + auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsSEH = isAsynchronousEHPersonality(Pers); + if (IsSEH) { + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (TargetMBB != NextBlock(FuncInfo.MBB) || + TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB))); + return; + } + + // Figure out the funclet membership for the catchret's successor. + // This will be used by the FuncletLayout pass to determine how to order the + // BB's. + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I]; + assert(SuccessorColor && "No parent funclet for catchret!"); + MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor]; + assert(SuccessorColorMBB && "No MBB for SuccessorColor!"); + + // Create the terminator node. + SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(TargetMBB), + DAG.getBasicBlock(SuccessorColorMBB)); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) { + // Don't emit any special code for the cleanuppad instruction. It just marks + // the start of a funclet. + FuncInfo.MBB->setIsEHFuncletEntry(); + FuncInfo.MBB->setIsCleanupFuncletEntry(); +} + +/// When an invoke or a cleanupret unwinds to the next EH pad, there are +/// many places it could ultimately go. In the IR, we have a single unwind +/// destination, but in the machine CFG, we enumerate all the possible blocks. +/// This function skips over imaginary basic blocks that hold catchswitch +/// instructions, and finds all the "real" machine +/// basic block destinations. As those destinations may not be successors of +/// EHPadBB, here we also calculate the edge probability to those destinations. +/// The passed-in Prob is the edge probability to EHPadBB. +static void findUnwindDestinations( + FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB, + BranchProbability Prob, + SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>> + &UnwindDests) { + EHPersonality Personality = + classifyEHPersonality(FuncInfo.Fn->getPersonalityFn()); + bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX; + bool IsCoreCLR = Personality == EHPersonality::CoreCLR; + + while (EHPadBB) { + const Instruction *Pad = EHPadBB->getFirstNonPHI(); + BasicBlock *NewEHPadBB = nullptr; + if (isa<LandingPadInst>(Pad)) { + // Stop on landingpads. They are not funclets. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + break; + } else if (isa<CleanupPadInst>(Pad)) { + // Stop on cleanup pads. Cleanups are always funclet entries for all known + // personalities. + UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob); + UnwindDests.back().first->setIsEHFuncletEntry(); + break; + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) { + // Add the catchpad handlers to the possible destinations. + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob); + // For MSVC++ and the CLR, catchblocks are funclets and need prologues. + if (IsMSVCCXX || IsCoreCLR) + UnwindDests.back().first->setIsEHFuncletEntry(); + } + NewEHPadBB = CatchSwitch->getUnwindDest(); + } else { + continue; + } + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + if (BPI && NewEHPadBB) + Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB); + EHPadBB = NewEHPadBB; + } +} + +void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) { + // Update successor info. + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + auto UnwindDest = I.getUnwindDest(); + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability UnwindDestProb = + (BPI && UnwindDest) + ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second); + } + FuncInfo.MBB->normalizeSuccProbs(); + + // Create the terminator node. + SDValue Ret = + DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot()); + DAG.setRoot(Ret); +} + +void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) { + report_fatal_error("visitCatchSwitch not yet implemented!"); +} + +void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto &DL = DAG.getDataLayout(); + SDValue Chain = getControlRoot(); + SmallVector<ISD::OutputArg, 8> Outs; + SmallVector<SDValue, 8> OutVals; + + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; + const Function *F = I.getParent()->getParent(); + + // Emit a store of the return value through the virtual register. + // Leave Outs empty so that LowerReturn won't try to load return + // registers the usual way. + SmallVector<EVT, 1> PtrValueVTs; + ComputeValueVTs(TLI, DL, PointerType::getUnqual(F->getReturnType()), + PtrValueVTs); + + SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + DemoteReg, PtrValueVTs[0]); + SDValue RetOp = getValue(I.getOperand(0)); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + SmallVector<SDValue, 4> Chains(NumValues); + for (unsigned i = 0; i != NumValues; ++i) { + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), + RetPtr.getValueType(), RetPtr, + DAG.getIntPtrConstant(Offsets[i], + getCurSDLoc()), + &Flags); + Chains[i] = + DAG.getStore(Chain, getCurSDLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + i), + // FIXME: better loc info would be nice. + Add, MachinePointerInfo(), false, false, 0); + } + + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), + MVT::Other, Chains); + } else if (I.getNumOperands() != 0) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues) { + SDValue RetOp = getValue(I.getOperand(0)); + + const Function *F = I.getParent()->getParent(); + + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + LLVMContext &Context = F->getContext(); + bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg); + + for (unsigned j = 0; j != NumValues; ++j) { + EVT VT = ValueVTs[j]; + + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); + + unsigned NumParts = TLI.getNumRegisters(Context, VT); + MVT PartVT = TLI.getRegisterType(Context, VT); + SmallVector<SDValue, 4> Parts(NumParts); + getCopyToParts(DAG, getCurSDLoc(), + SDValue(RetOp.getNode(), RetOp.getResNo() + j), + &Parts[0], NumParts, PartVT, &I, ExtendKind); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (RetInReg) + Flags.setInReg(); + + // Propagate extension type if any + if (ExtendKind == ISD::SIGN_EXTEND) + Flags.setSExt(); + else if (ExtendKind == ISD::ZERO_EXTEND) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + VT, /*isfixed=*/true, 0, 0)); + OutVals.push_back(Parts[i]); + } + } + } + } + + bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); + CallingConv::ID CallConv = + DAG.getMachineFunction().getFunction()->getCallingConv(); + Chain = DAG.getTargetLoweringInfo().LowerReturn( + Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); + + // Verify that the target's LowerReturn behaved as expected. + assert(Chain.getNode() && Chain.getValueType() == MVT::Other && + "LowerReturn didn't return a valid chain!"); + + // Update the DAG with the new chain value resulting from return lowering. + DAG.setRoot(Chain); +} + +/// CopyToExportRegsIfNeeded - If the given value has virtual registers +/// created for it, emit nodes to copy the value into the virtual +/// registers. +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { + // Skip empty types + if (V->getType()->isEmptyTy()) + return; + + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) { + assert(!V->use_empty() && "Unused value assigned virtual registers!"); + CopyValueToVirtualRegister(V, VMI->second); + } +} + +/// ExportFromCurrentBlock - If this condition isn't known to be exported from +/// the current basic block, add it to ValueMap now so that we'll get a +/// CopyTo/FromReg. +void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { + // No need to export constants. + if (!isa<Instruction>(V) && !isa<Argument>(V)) return; + + // Already exported? + if (FuncInfo.isExportedInst(V)) return; + + unsigned Reg = FuncInfo.InitializeRegForValue(V); + CopyValueToVirtualRegister(V, Reg); +} + +bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, + const BasicBlock *FromBB) { + // The operands of the setcc have to be in this block. We don't know + // how to export them from some other block. + if (const Instruction *VI = dyn_cast<Instruction>(V)) { + // Can export from current BB. + if (VI->getParent() == FromBB) + return true; + + // Is already exported, noop. + return FuncInfo.isExportedInst(V); + } + + // If this is an argument, we can export it if the BB is the entry block or + // if it is already exported. + if (isa<Argument>(V)) { + if (FromBB == &FromBB->getParent()->getEntryBlock()) + return true; + + // Otherwise, can only export this if it is already exported. + return FuncInfo.isExportedInst(V); + } + + // Otherwise, constants can always be exported. + return true; +} + +/// Return branch probability calculated by BranchProbabilityInfo for IR blocks. +BranchProbability +SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const { + BranchProbabilityInfo *BPI = FuncInfo.BPI; + const BasicBlock *SrcBB = Src->getBasicBlock(); + const BasicBlock *DstBB = Dst->getBasicBlock(); + if (!BPI) { + // If BPI is not available, set the default probability as 1 / N, where N is + // the number of successors. + auto SuccSize = std::max<uint32_t>( + std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1); + return BranchProbability(1, SuccSize); + } + return BPI->getEdgeProbability(SrcBB, DstBB); +} + +void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src, + MachineBasicBlock *Dst, + BranchProbability Prob) { + if (!FuncInfo.BPI) + Src->addSuccessorWithoutProb(Dst); + else { + if (Prob.isUnknown()) + Prob = getEdgeProbability(Src, Dst); + Src->addSuccessor(Dst, Prob); + } +} + +static bool InBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast<Instruction>(V)) + return I->getParent() == BB; + return true; +} + +/// EmitBranchForMergedCondition - Helper method for FindMergedConditions. +/// This function emits a branch and is used at the leaves of an OR or an +/// AND operator tree. +/// +void +SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TProb, + BranchProbability FProb) { + const BasicBlock *BB = CurBB->getBasicBlock(); + + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { + // The operands of the cmp have to be in this block. We don't know + // how to export them from some other block. If this is the first block + // of the sequence, no exporting is needed. + if (CurBB == SwitchBB || + (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && + isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { + ISD::CondCode Condition; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { + Condition = getICmpCondCode(IC->getPredicate()); + } else { + const FCmpInst *FC = cast<FCmpInst>(Cond); + Condition = getFCmpCondCode(FC->getPredicate()); + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + } + + CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, + TBB, FBB, CurBB, TProb, FProb); + SwitchCases.push_back(CB); + return; + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), + nullptr, TBB, FBB, CurBB, TProb, FProb); + SwitchCases.push_back(CB); +} + +/// FindMergedConditions - If Cond is an expression like +void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, + BranchProbability TProb, + BranchProbability FProb) { + // If this node is not part of the or/and tree, emit it as a branch. + const Instruction *BOp = dyn_cast<Instruction>(Cond); + if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || + (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, + TProb, FProb); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineFunction &MF = DAG.getMachineFunction(); + MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1]); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1]); + } +} + +/// If the set of cases should be emitted as a series of branches, return true. +/// If we should emit this as a bunch of and/or'd together conditions, return +/// false. +bool +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { + if (Cases.size() != 2) return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].CC == Cases[1].CC && + isa<Constant>(Cases[0].CmpRHS) && + cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + +void SelectionDAGBuilder::visitBr(const BranchInst &I) { + MachineBasicBlock *BrMBB = FuncInfo.MBB; + + // Update machine-CFG edges. + MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; + + if (I.isUnconditional()) { + // Update machine-CFG edges. + BrMBB->addSuccessor(Succ0MBB); + + // If this is not a fall-through branch or optimizations are switched off, + // emit the branch. + if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Succ0MBB))); + + return; + } + + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = I.getCondition(); + MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; + + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive, this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + // + if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() && + !I.getMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or)) { + FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, + Opcode, + getEdgeProbability(BrMBB, Succ0MBB), + getEdgeProbability(BrMBB, Succ1MBB)); + // If the compares in later blocks need to use values not currently + // exported from this block, export them now. This block should always + // be the first entry. + assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (ShouldEmitAsBranches(SwitchCases)) { + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { + ExportFromCurrentBlock(SwitchCases[i].CmpLHS); + ExportFromCurrentBlock(SwitchCases[i].CmpRHS); + } + + // Emit the branch for this block. + visitSwitchCase(SwitchCases[0], BrMBB); + SwitchCases.erase(SwitchCases.begin()); + return; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) + FuncInfo.MF->erase(SwitchCases[i].ThisBB); + + SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), + nullptr, Succ0MBB, Succ1MBB, BrMBB); + + // Use visitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + visitSwitchCase(CB, BrMBB); +} + +/// visitSwitchCase - Emits the necessary code to represent a single node in +/// the binary search tree resulting from lowering a switch instruction. +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB) { + SDValue Cond; + SDValue CondLHS = getValue(CB.CmpLHS); + SDLoc dl = getCurSDLoc(); + + // Build the setcc now. + if (!CB.CmpMHS) { + // Fold "(X == true)" to X and "(X == false)" to !X to + // handle common cases produced by branch lowering. + if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && + CB.CC == ISD::SETEQ) + Cond = CondLHS; + else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && + CB.CC == ISD::SETEQ) { + SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); + } else + Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); + } else { + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); + + const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); + const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); + + SDValue CmpOp = getValue(CB.CmpMHS); + EVT VT = CmpOp.getValueType(); + + if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { + Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT), + ISD::SETLE); + } else { + SDValue SUB = DAG.getNode(ISD::SUB, dl, + VT, CmpOp, DAG.getConstant(Low, dl, VT)); + Cond = DAG.getSetCC(dl, MVT::i1, SUB, + DAG.getConstant(High-Low, dl, VT), ISD::SETULE); + } + } + + // Update successor info + addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb); + // TrueBB and FalseBB are always different unless the incoming IR is + // degenerate. This only happens when running llc on weird IR. + if (CB.TrueBB != CB.FalseBB) + addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb); + SwitchBB->normalizeSuccProbs(); + + // If the lhs block is the next block, invert the condition so that we can + // fall through to the lhs instead of the rhs block. + if (CB.TrueBB == NextBlock(SwitchBB)) { + std::swap(CB.TrueBB, CB.FalseBB); + SDValue True = DAG.getConstant(1, dl, Cond.getValueType()); + Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); + } + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(CB.TrueBB)); + + // Insert the false branch. Do this even if it's a fall through branch, + // this makes it easier to do DAG optimizations which require inverting + // the branch condition. + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); + + DAG.setRoot(BrCond); +} + +/// visitJumpTable - Emit JumpTable node in the current MBB +void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { + // Emit the code for the jump table + assert(JT.Reg != -1U && "Should lower JT Header first!"); + EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), + JT.Reg, PTy); + SDValue Table = DAG.getJumpTable(JT.JTI, PTy); + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), + MVT::Other, Index.getValue(1), + Table, Index); + DAG.setRoot(BrJumpTable); +} + +/// visitJumpTableHeader - This function emits necessary code to produce index +/// in the JumpTable from switch case. +void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + + // Subtract the lowest switch case value from the value being switched on and + // conditional branch to default mbb if the result is greater than the + // difference between smallest and largest cases. + SDValue SwitchOp = getValue(JTH.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(JTH.First, dl, VT)); + + // The SDNode we just created, which holds the value being switched on minus + // the smallest case value, needs to be copied to a virtual register so it + // can be used as an index into the jump table in a subsequent basic block. + // This value may be smaller or larger than the target's pointer type, and + // therefore require extension or truncating. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout())); + + unsigned JumpTableReg = + FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout())); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, + JumpTableReg, SwitchOp); + JT.Reg = JumpTableReg; + + // Emit the range check for the jump table, and branch to the default block + // for the switch statement if the value being switched on exceeds the largest + // case in the switch. + SDValue CMP = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT); + + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, CopyTo, CMP, + DAG.getBasicBlock(JT.Default)); + + // Avoid emitting unnecessary branches to the next block. + if (JT.MBB != NextBlock(SwitchBB)) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(JT.MBB)); + + DAG.setRoot(BrCond); +} + +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + + MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI->getStackProtectorIndex(); + + const Value *IRGuard = SPD.getGuard(); + SDValue GuardPtr = getValue(IRGuard); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + + unsigned Align = DL->getPrefTypeAlignment(IRGuard->getType()); + + SDValue Guard; + SDLoc dl = getCurSDLoc(); + + // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the + // guard value from the virtual register holding the value. Otherwise, emit a + // volatile load to retrieve the stack guard value. + unsigned GuardReg = SPD.getGuardReg(); + + if (GuardReg && TLI.useLoadStackGuardNode()) + Guard = DAG.getCopyFromReg(DAG.getEntryNode(), dl, GuardReg, + PtrTy); + else + Guard = DAG.getLoad(PtrTy, dl, DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); + + SDValue StackSlot = DAG.getLoad( + PtrTy, dl, DAG.getEntryNode(), StackSlotPtr, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true, + false, false, Align); + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot); + + SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(), + *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, dl, VT), ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, StackSlot.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, dl, + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Chain = + TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, + None, false, getCurSDLoc(), false, false).second; + DAG.setRoot(Chain); +} + +/// visitBitTestHeader - This function emits necessary code to produce value +/// suitable for "bit tests" +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, + MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + + // Subtract the minimum value + SDValue SwitchOp = getValue(B.SValue); + EVT VT = SwitchOp.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp, + DAG.getConstant(B.First, dl, VT)); + + // Check range + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue RangeCmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT); + + // Determine the type of the test operands. + bool UsePtrType = false; + if (!TLI.isTypeLegal(VT)) + UsePtrType = true; + else { + for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) + if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { + // Switch table case range are encoded into series of masks. + // Just use pointer type, it's guaranteed to fit. + UsePtrType = true; + break; + } + } + if (UsePtrType) { + VT = TLI.getPointerTy(DAG.getDataLayout()); + Sub = DAG.getZExtOrTrunc(Sub, dl, VT); + } + + B.RegVT = VT.getSimpleVT(); + B.Reg = FuncInfo.CreateReg(B.RegVT); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub); + + MachineBasicBlock* MBB = B.Cases[0].ThisBB; + + addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb); + addSuccessorWithProb(SwitchBB, MBB, B.Prob); + SwitchBB->normalizeSuccProbs(); + + SDValue BrRange = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, CopyTo, RangeCmp, + DAG.getBasicBlock(B.Default)); + + // Avoid emitting unnecessary branches to the next block. + if (MBB != NextBlock(SwitchBB)) + BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange, + DAG.getBasicBlock(MBB)); + + DAG.setRoot(BrRange); +} + +/// visitBitTestCase - this function produces one "bit test" +void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, + BranchProbability BranchProbToNext, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB) { + SDLoc dl = getCurSDLoc(); + MVT VT = BB.RegVT; + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT); + SDValue Cmp; + unsigned PopCount = countPopulation(B.Mask); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (PopCount == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT), + ISD::SETEQ); + } else if (PopCount == BB.Range) { + // There is only one zero bit in the range, test for it directly. + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT), + ISD::SETNE); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT, + DAG.getConstant(1, dl, VT), ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, dl, + VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT)); + Cmp = DAG.getSetCC( + dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), + AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE); + } + + // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb. + addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb); + // The branch probability from SwitchBB to NextMBB is BranchProbToNext. + addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext); + // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is + // one as they are relative probabilities (and thus work more like weights), + // and hence we need to normalize them to let the sum of them become one. + SwitchBB->normalizeSuccProbs(); + + SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl, + MVT::Other, getControlRoot(), + Cmp, DAG.getBasicBlock(B.TargetBB)); + + // Avoid emitting unnecessary branches to the next block. + if (NextMBB != NextBlock(SwitchBB)) + BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd, + DAG.getBasicBlock(NextMBB)); + + DAG.setRoot(BrAnd); +} + +void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; + + // Retrieve successors. Look through artificial IR level blocks like + // catchswitch for successors. + MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; + const BasicBlock *EHPadBB = I.getSuccessor(1); + + const Value *Callee(I.getCalledValue()); + const Function *Fn = dyn_cast<Function>(Callee); + if (isa<InlineAsm>(Callee)) + visitInlineAsm(&I); + else if (Fn && Fn->isIntrinsic()) { + switch (Fn->getIntrinsicID()) { + default: + llvm_unreachable("Cannot invoke this intrinsic"); + case Intrinsic::donothing: + // Ignore invokes to @llvm.donothing: jump directly to the next BB. + break; + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: + visitPatchpoint(&I, EHPadBB); + break; + case Intrinsic::experimental_gc_statepoint: + LowerStatepoint(ImmutableStatepoint(&I), EHPadBB); + break; + } + } else + LowerCallTo(&I, getValue(Callee), false, EHPadBB); + + // If the value of the invoke is used outside of its defining block, make it + // available as a virtual register. + // We already took care of the exported value for the statepoint instruction + // during call to the LowerStatepoint. + if (!isStatepoint(I)) { + CopyToExportRegsIfNeeded(&I); + } + + SmallVector<std::pair<MachineBasicBlock *, BranchProbability>, 1> UnwindDests; + BranchProbabilityInfo *BPI = FuncInfo.BPI; + BranchProbability EHPadBBProb = + BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB) + : BranchProbability::getZero(); + findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests); + + // Update successor info. + addSuccessorWithProb(InvokeMBB, Return); + for (auto &UnwindDest : UnwindDests) { + UnwindDest.first->setIsEHPad(); + addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second); + } + InvokeMBB->normalizeSuccProbs(); + + // Drop into normal successor. + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, getControlRoot(), + DAG.getBasicBlock(Return))); +} + +void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { + llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); +} + +void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { + assert(FuncInfo.MBB->isEHPad() && + "Call to landingpad not in landing pad!"); + + MachineBasicBlock *MBB = FuncInfo.MBB; + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + AddLandingPadInfo(LP, MMI, MBB); + + // If there aren't registers to copy the values into (e.g., during SjLj + // exceptions), then don't bother to create these DAG nodes. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn(); + if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 && + TLI.getExceptionSelectorRegister(PersonalityFn) == 0) + return; + + // If landingpad's return type is token type, we don't create DAG nodes + // for its exception pointer and selector value. The extraction of exception + // pointer or selector value from token type landingpads is not currently + // supported. + if (LP.getType()->isTokenTy()) + return; + + SmallVector<EVT, 2> ValueVTs; + SDLoc dl = getCurSDLoc(); + ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs); + assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); + + // Get the two live-in registers as SDValues. The physregs have already been + // copied into virtual registers. + SDValue Ops[2]; + if (FuncInfo.ExceptionPointerVirtReg) { + Ops[0] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), dl, + FuncInfo.ExceptionPointerVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), + dl, ValueVTs[0]); + } else { + Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout())); + } + Ops[1] = DAG.getZExtOrTrunc( + DAG.getCopyFromReg(DAG.getEntryNode(), dl, + FuncInfo.ExceptionSelectorVirtReg, + TLI.getPointerTy(DAG.getDataLayout())), + dl, ValueVTs[1]); + + // Merge into one. + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(ValueVTs), Ops); + setValue(&LP, Res); +} + +void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) { +#ifndef NDEBUG + for (const CaseCluster &CC : Clusters) + assert(CC.Low == CC.High && "Input clusters must be single-case"); +#endif + + std::sort(Clusters.begin(), Clusters.end(), + [](const CaseCluster &a, const CaseCluster &b) { + return a.Low->getValue().slt(b.Low->getValue()); + }); + + // Merge adjacent clusters with the same destination. + const unsigned N = Clusters.size(); + unsigned DstIndex = 0; + for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) { + CaseCluster &CC = Clusters[SrcIndex]; + const ConstantInt *CaseVal = CC.Low; + MachineBasicBlock *Succ = CC.MBB; + + if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ && + (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) { + // If this case has the same successor and is a neighbour, merge it into + // the previous cluster. + Clusters[DstIndex - 1].High = CaseVal; + Clusters[DstIndex - 1].Prob += CC.Prob; + } else { + std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex], + sizeof(Clusters[SrcIndex])); + } + } + Clusters.resize(DstIndex); +} + +void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, + MachineBasicBlock *Last) { + // Update JTCases. + for (unsigned i = 0, e = JTCases.size(); i != e; ++i) + if (JTCases[i].first.HeaderBB == First) + JTCases[i].first.HeaderBB = Last; + + // Update BitTestCases. + for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) + if (BitTestCases[i].Parent == First) + BitTestCases[i].Parent = Last; +} + +void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; + + // Update machine-CFG edges with unique successors. + SmallSet<BasicBlock*, 32> Done; + for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { + BasicBlock *BB = I.getSuccessor(i); + bool Inserted = Done.insert(BB).second; + if (!Inserted) + continue; + + MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; + addSuccessorWithProb(IndirectBrMBB, Succ); + } + IndirectBrMBB->normalizeSuccProbs(); + + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), + MVT::Other, getControlRoot(), + getValue(I.getAddress()))); +} + +void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { + if (DAG.getTarget().Options.TrapUnreachable) + DAG.setRoot( + DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); +} + +void SelectionDAGBuilder::visitFSub(const User &I) { + // -0.0 - X --> fneg + Type *Ty = I.getType(); + if (isa<Constant>(I.getOperand(0)) && + I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), + Op2.getValueType(), Op2)); + return; + } + + visitBinary(I, ISD::FSUB); +} + +void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + bool nuw = false; + bool nsw = false; + bool exact = false; + FastMathFlags FMF; + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I)) + FMF = FPOp->getFastMathFlags(); + + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); + if (EnableFMFInDAG) { + Flags.setAllowReciprocal(FMF.allowReciprocal()); + Flags.setNoInfs(FMF.noInfs()); + Flags.setNoNaNs(FMF.noNaNs()); + Flags.setNoSignedZeros(FMF.noSignedZeros()); + Flags.setUnsafeAlgebra(FMF.unsafeAlgebra()); + } + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, &Flags); + setValue(&I, BinNodeValue); +} + +void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy( + Op2.getValueType(), DAG.getDataLayout()); + + // Coerce the shift amount to the right type if we can. + if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { + unsigned ShiftSize = ShiftTy.getSizeInBits(); + unsigned Op2Size = Op2.getValueType().getSizeInBits(); + SDLoc DL = getCurSDLoc(); + + // If the operand is smaller than the shift count type, promote it. + if (ShiftSize > Op2Size) + Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); + + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); + // Otherwise we'll need to temporarily settle for some other convenient + // type. Type legalization will make adjustments once the shiftee is split. + else + Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); + } + + bool nuw = false; + bool nsw = false; + bool exact = false; + + if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + } + SDNodeFlags Flags; + Flags.setExact(exact); + Flags.setNoSignedWrap(nsw); + Flags.setNoUnsignedWrap(nuw); + SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, + &Flags); + setValue(&I, Res); +} + +void SelectionDAGBuilder::visitSDiv(const User &I) { + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + + SDNodeFlags Flags; + Flags.setExact(isa<PossiblyExactOperator>(&I) && + cast<PossiblyExactOperator>(&I)->isExact()); + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, + Op2, &Flags)); +} + +void SelectionDAGBuilder::visitICmp(const User &I) { + ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; + if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) + predicate = IC->getPredicate(); + else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) + predicate = ICmpInst::Predicate(IC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Opcode = getICmpCondCode(predicate); + + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); +} + +void SelectionDAGBuilder::visitFCmp(const User &I) { + FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; + if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) + predicate = FC->getPredicate(); + else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) + predicate = FCmpInst::Predicate(FC->getPredicate()); + SDValue Op1 = getValue(I.getOperand(0)); + SDValue Op2 = getValue(I.getOperand(1)); + ISD::CondCode Condition = getFCmpCondCode(predicate); + + // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them. + // FIXME: We should propagate the fast-math-flags to the DAG node itself for + // further optimization, but currently FMF is only applicable to binary nodes. + if (TM.Options.NoNaNsFPMath) + Condition = getFCmpCodeWithoutNaN(Condition); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); +} + +void SelectionDAGBuilder::visitSelect(const User &I) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(), + ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + SmallVector<SDValue, 4> Values(NumValues); + SDValue Cond = getValue(I.getOperand(0)); + SDValue LHSVal = getValue(I.getOperand(1)); + SDValue RHSVal = getValue(I.getOperand(2)); + auto BaseOps = {Cond}; + ISD::NodeType OpCode = Cond.getValueType().isVector() ? + ISD::VSELECT : ISD::SELECT; + + // Min/max matching is only viable if all output VTs are the same. + if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) { + EVT VT = ValueVTs[0]; + LLVMContext &Ctx = *DAG.getContext(); + auto &TLI = DAG.getTargetLoweringInfo(); + + // We care about the legality of the operation after it has been type + // legalized. + while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal && + VT != TLI.getTypeToTransformTo(Ctx, VT)) + VT = TLI.getTypeToTransformTo(Ctx, VT); + + // If the vselect is legal, assume we want to leave this as a vector setcc + + // vselect. Otherwise, if this is going to be scalarized, we want to see if + // min/max is legal on the scalar type. + bool UseScalarMinMax = VT.isVector() && + !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT); + + Value *LHS, *RHS; + auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS); + ISD::NodeType Opc = ISD::DELETED_NODE; + switch (SPR.Flavor) { + case SPF_UMAX: Opc = ISD::UMAX; break; + case SPF_UMIN: Opc = ISD::UMIN; break; + case SPF_SMAX: Opc = ISD::SMAX; break; + case SPF_SMIN: Opc = ISD::SMIN; break; + case SPF_FMINNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break; + case SPNB_RETURNS_ANY: { + if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT)) + Opc = ISD::FMINNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT)) + Opc = ISD::FMINNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ? + ISD::FMINNUM : ISD::FMINNAN; + break; + } + } + break; + case SPF_FMAXNUM: + switch (SPR.NaNBehavior) { + case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?"); + case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break; + case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break; + case SPNB_RETURNS_ANY: + + if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT)) + Opc = ISD::FMAXNUM; + else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT)) + Opc = ISD::FMAXNAN; + else if (UseScalarMinMax) + Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ? + ISD::FMAXNUM : ISD::FMAXNAN; + break; + } + break; + default: break; + } + + if (Opc != ISD::DELETED_NODE && + (TLI.isOperationLegalOrCustom(Opc, VT) || + (UseScalarMinMax && + TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) && + // If the underlying comparison instruction is used by any other + // instruction, the consumed instructions won't be destroyed, so it is + // not profitable to convert to a min/max. + cast<SelectInst>(&I)->getCondition()->hasOneUse()) { + OpCode = Opc; + LHSVal = getValue(LHS); + RHSVal = getValue(RHS); + BaseOps = {}; + } + } + + for (unsigned i = 0; i != NumValues; ++i) { + SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end()); + Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i)); + Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i)); + Values[i] = DAG.getNode(OpCode, getCurSDLoc(), + LHSVal.getNode()->getValueType(LHSVal.getResNo()+i), + Ops); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ValueVTs), Values)); +} + +void SelectionDAGBuilder::visitTrunc(const User &I) { + // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitZExt(const User &I) { + // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // ZExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSExt(const User &I) { + // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). + // SExt also can't be a cast to bool for same reason. So, nothing much to do + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPTrunc(const User &I) { + // FPTrunc is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N, + DAG.getTargetConstant( + 0, dl, TLI.getPointerTy(DAG.getDataLayout())))); +} + +void SelectionDAGBuilder::visitFPExt(const User &I) { + // FPExt is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToUI(const User &I) { + // FPToUI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitFPToSI(const User &I) { + // FPToSI is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitUIToFP(const User &I) { + // UIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitSIToFP(const User &I) { + // SIToFP is never a no-op cast, no need to check + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); +} + +void SelectionDAGBuilder::visitPtrToInt(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); +} + +void SelectionDAGBuilder::visitIntToPtr(const User &I) { + // What to do depends on the size of the integer and the size of the pointer. + // We can either truncate, zero extend, or no-op, accordingly. + SDValue N = getValue(I.getOperand(0)); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); +} + +void SelectionDAGBuilder::visitBitCast(const User &I) { + SDValue N = getValue(I.getOperand(0)); + SDLoc dl = getCurSDLoc(); + EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType()); + + // BitCast assures us that source and destination are the same size so this is + // either a BITCAST or a no-op. + if (DestVT != N.getValueType()) + setValue(&I, DAG.getNode(ISD::BITCAST, dl, + DestVT, N)); // convert types. + // Check if the original LLVM IR Operand was a ConstantInt, because getValue() + // might fold any kind of constant expression to an integer constant and that + // is not what we are looking for. Only regcognize a bitcast of a genuine + // constant integer as an opaque constant. + else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) + setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false, + /*isOpaque*/true)); + else + setValue(&I, N); // noop cast. +} + +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitInsertElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue InVec = getValue(I.getOperand(0)); + SDValue InVal = getValue(I.getOperand(1)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InVal, InIdx)); +} + +void SelectionDAGBuilder::visitExtractElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue InVec = getValue(I.getOperand(0)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(), + TLI.getVectorIdxTy(DAG.getDataLayout())); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + TLI.getValueType(DAG.getDataLayout(), I.getType()), + InVec, InIdx)); +} + +// Utility for visitShuffleVector - Return true if every element in Mask, +// beginning from position Pos and ending in Pos+Size, falls within the +// specified sequential range [L, L+Pos). or is undef. +static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, + unsigned Pos, unsigned Size, int Low) { + for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) + if (Mask[i] >= 0 && Mask[i] != Low) + return false; + return true; +} + +void SelectionDAGBuilder::visitShuffleVector(const User &I) { + SDValue Src1 = getValue(I.getOperand(0)); + SDValue Src2 = getValue(I.getOperand(1)); + + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); + unsigned MaskNumElts = Mask.size(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + EVT SrcVT = Src1.getValueType(); + unsigned SrcNumElts = SrcVT.getVectorNumElements(); + + if (SrcNumElts == MaskNumElts) { + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, + &Mask[0])); + return; + } + + // Normalize the shuffle vector since mask and vector length don't match. + if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { + // Mask is longer than the source vectors and is a multiple of the source + // vectors. We can use concatenate vector to make the mask and vectors + // lengths match. + if (SrcNumElts*2 == MaskNumElts) { + // First check for Src1 in low and Src2 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), + VT, Src1, Src2)); + return; + } + // Then check for Src2 in low and Src1 in high + if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && + isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { + // The shuffle is concatenating two vectors together. + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), + VT, Src2, Src1)); + return; + } + } + + // Pad both vectors with undefs to make them the same length as the mask. + unsigned NumConcat = MaskNumElts / SrcNumElts; + bool Src1U = Src1.getOpcode() == ISD::UNDEF; + bool Src2U = Src2.getOpcode() == ISD::UNDEF; + SDValue UndefVal = DAG.getUNDEF(SrcVT); + + SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); + SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); + MOps1[0] = Src1; + MOps2[0] = Src2; + + Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps1); + Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, + getCurSDLoc(), VT, MOps2); + + // Readjust mask for new input vector length. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx >= (int)SrcNumElts) + Idx -= SrcNumElts - MaskNumElts; + MappedOps.push_back(Idx); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + + if (SrcNumElts > MaskNumElts) { + // Analyze the access pattern of the vector to see if we can extract + // two subvectors and do the shuffle. The analysis is done by calculating + // the range of elements the mask access on both vectors. + int MinRange[2] = { static_cast<int>(SrcNumElts), + static_cast<int>(SrcNumElts)}; + int MaxRange[2] = {-1, -1}; + + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + unsigned Input = 0; + if (Idx < 0) + continue; + + if (Idx >= (int)SrcNumElts) { + Input = 1; + Idx -= SrcNumElts; + } + if (Idx > MaxRange[Input]) + MaxRange[Input] = Idx; + if (Idx < MinRange[Input]) + MinRange[Input] = Idx; + } + + // Check if the access is smaller than the vector size and can we find + // a reasonable extract index. + int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not + // Extract. + int StartIdx[2]; // StartIdx to extract from + for (unsigned Input = 0; Input < 2; ++Input) { + if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { + RangeUse[Input] = 0; // Unused + StartIdx[Input] = 0; + continue; + } + + // Find a good start index that is a multiple of the mask length. Then + // see if the rest of the elements are in range. + StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; + if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && + StartIdx[Input] + MaskNumElts <= SrcNumElts) + RangeUse[Input] = 1; // Extract from a multiple of the mask length. + } + + if (RangeUse[0] == 0 && RangeUse[1] == 0) { + setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. + return; + } + if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { + // Extract appropriate subvector and generate a vector shuffle + for (unsigned Input = 0; Input < 2; ++Input) { + SDValue &Src = Input == 0 ? Src1 : Src2; + if (RangeUse[Input] == 0) + Src = DAG.getUNDEF(VT); + else { + SDLoc dl = getCurSDLoc(); + Src = DAG.getNode( + ISD::EXTRACT_SUBVECTOR, dl, VT, Src, + DAG.getConstant(StartIdx[Input], dl, + TLI.getVectorIdxTy(DAG.getDataLayout()))); + } + } + + // Calculate new mask. + SmallVector<int, 8> MappedOps; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + if (Idx >= 0) { + if (Idx < (int)SrcNumElts) + Idx -= StartIdx[0]; + else + Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; + } + MappedOps.push_back(Idx); + } + + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, + &MappedOps[0])); + return; + } + } + + // We can't use either concat vectors or extract subvectors so fall back to + // replacing the shuffle with extract and build vector. + // to insert and build vector. + EVT EltVT = VT.getVectorElementType(); + EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout()); + SDLoc dl = getCurSDLoc(); + SmallVector<SDValue,8> Ops; + for (unsigned i = 0; i != MaskNumElts; ++i) { + int Idx = Mask[i]; + SDValue Res; + + if (Idx < 0) { + Res = DAG.getUNDEF(EltVT); + } else { + SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; + if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; + + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + EltVT, Src, DAG.getConstant(Idx, dl, IdxVT)); + } + + Ops.push_back(Res); + } + + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops)); +} + +void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { + const Value *Op0 = I.getOperand(0); + const Value *Op1 = I.getOperand(1); + Type *AggTy = I.getType(); + Type *ValTy = Op1->getType(); + bool IntoUndef = isa<UndefValue>(Op0); + bool FromUndef = isa<UndefValue>(Op1); + + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> AggValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs); + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); + + unsigned NumAggValues = AggValueVTs.size(); + unsigned NumValValues = ValValueVTs.size(); + SmallVector<SDValue, 4> Values(NumAggValues); + + // Ignore an insertvalue that produces an empty object + if (!NumAggValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + + SDValue Agg = getValue(Op0); + unsigned i = 0; + // Copy the beginning value(s) from the original aggregate. + for (; i != LinearIndex; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + // Copy values from the inserted value(s). + if (NumValValues) { + SDValue Val = getValue(Op1); + for (; i != LinearIndex + NumValValues; ++i) + Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); + } + // Copy remaining value(s) from the original aggregate. + for (; i != NumAggValues; ++i) + Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(AggValueVTs), Values)); +} + +void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { + const Value *Op0 = I.getOperand(0); + Type *AggTy = Op0->getType(); + Type *ValTy = I.getType(); + bool OutOfUndef = isa<UndefValue>(Op0); + + unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs); + + unsigned NumValValues = ValValueVTs.size(); + + // Ignore a extractvalue that produces an empty object + if (!NumValValues) { + setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); + return; + } + + SmallVector<SDValue, 4> Values(NumValValues); + + SDValue Agg = getValue(Op0); + // Copy out the selected value(s). + for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) + Values[i - LinearIndex] = + OutOfUndef ? + DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : + SDValue(Agg.getNode(), Agg.getResNo() + i); + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), + DAG.getVTList(ValValueVTs), Values)); +} + +void SelectionDAGBuilder::visitGetElementPtr(const User &I) { + Value *Op0 = I.getOperand(0); + // Note that the pointer operand may be a vector of pointers. Take the scalar + // element which holds a pointer. + Type *Ty = Op0->getType()->getScalarType(); + unsigned AS = Ty->getPointerAddressSpace(); + SDValue N = getValue(Op0); + SDLoc dl = getCurSDLoc(); + + // Normalize Vector GEP - all scalar operands should be converted to the + // splat vector. + unsigned VectorWidth = I.getType()->isVectorTy() ? + cast<VectorType>(I.getType())->getVectorNumElements() : 0; + + if (VectorWidth && !N.getValueType().isVector()) { + MVT VT = MVT::getVectorVT(N.getValueType().getSimpleVT(), VectorWidth); + SmallVector<SDValue, 16> Ops(VectorWidth, N); + N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } + for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); + OI != E; ++OI) { + const Value *Idx = *OI; + if (StructType *StTy = dyn_cast<StructType>(Ty)) { + unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); + if (Field) { + // N = N + Offset + uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, + DAG.getConstant(Offset, dl, N.getValueType()), &Flags); + } + + Ty = StTy->getElementType(Field); + } else { + Ty = cast<SequentialType>(Ty)->getElementType(); + MVT PtrTy = + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout(), AS); + unsigned PtrSize = PtrTy.getSizeInBits(); + APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); + + // If this is a scalar constant or a splat vector of constants, + // handle it quickly. + const auto *CI = dyn_cast<ConstantInt>(Idx); + if (!CI && isa<ConstantDataVector>(Idx) && + cast<ConstantDataVector>(Idx)->getSplatValue()) + CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue()); + + if (CI) { + if (CI->isZero()) + continue; + APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); + SDValue OffsVal = VectorWidth ? + DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) : + DAG.getConstant(Offs, dl, PtrTy); + + // In an inbouds GEP with an offset that is nonnegative even when + // interpreted as signed, assume there is no unsigned overflow. + SDNodeFlags Flags; + if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds()) + Flags.setNoUnsignedWrap(true); + + N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, &Flags); + continue; + } + + // N = N + Idx * ElementSize; + SDValue IdxN = getValue(Idx); + + if (!IdxN.getValueType().isVector() && VectorWidth) { + MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth); + SmallVector<SDValue, 16> Ops(VectorWidth, IdxN); + IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); + } + // If the index is smaller or larger than intptr_t, truncate or extend + // it. + IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType()); + + // If this is a multiply by a power of two, turn it into a shl + // immediately. This is a very common case. + if (ElementSize != 1) { + if (ElementSize.isPowerOf2()) { + unsigned Amt = ElementSize.logBase2(); + IdxN = DAG.getNode(ISD::SHL, dl, + N.getValueType(), IdxN, + DAG.getConstant(Amt, dl, IdxN.getValueType())); + } else { + SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType()); + IdxN = DAG.getNode(ISD::MUL, dl, + N.getValueType(), IdxN, Scale); + } + } + + N = DAG.getNode(ISD::ADD, dl, + N.getValueType(), N, IdxN); + } + } + + setValue(&I, N); +} + +void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { + // If this is a fixed sized alloca in the entry block of the function, + // allocate it statically on the stack. + if (FuncInfo.StaticAllocaMap.count(&I)) + return; // getValue will auto-populate this. + + SDLoc dl = getCurSDLoc(); + Type *Ty = I.getAllocatedType(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment()); + + SDValue AllocSize = getValue(I.getArraySize()); + + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr, + AllocSize, + DAG.getConstant(TySize, dl, IntPtr)); + + // Handle alignment. If the requested alignment is less than or equal to + // the stack alignment, ignore it. If the size is greater than or equal to + // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. + unsigned StackAlign = + DAG.getSubtarget().getFrameLowering()->getStackAlignment(); + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + AllocSize = DAG.getNode(ISD::ADD, dl, + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(StackAlign - 1, dl), &Flags); + + // Mask out the low bits for alignment purposes. + AllocSize = DAG.getNode(ISD::AND, dl, + AllocSize.getValueType(), AllocSize, + DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1), + dl)); + + SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) }; + SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops); + setValue(&I, DSA); + DAG.setRoot(DSA.getValue(1)); + + assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); +} + +void SelectionDAGBuilder::visitLoad(const LoadInst &I) { + if (I.isAtomic()) + return visitAtomicLoad(I); + + const Value *SV = I.getOperand(0); + SDValue Ptr = getValue(SV); + + Type *Ty = I.getType(); + + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; + + // The IR notion of invariant_load only guarantees that all *non-faulting* + // invariant loads result in the same value. The MI notion of invariant load + // guarantees that the load can be legally moved to any location within its + // containing function. The MI notion of invariant_load is stronger than the + // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load + // with a guarantee that the location being loaded from is dereferenceable + // throughout the function's lifetime. + + bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr && + isDereferenceablePointer(SV, DAG.getDataLayout()); + unsigned Alignment = I.getAlignment(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + SDValue Root; + bool ConstantMemory = false; + if (isVolatile || NumValues > MaxParallelChains) + // Serialize volatile loads with other side effects. + Root = getRoot(); + else if (AA->pointsToConstantMemory(MemoryLocation( + SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = DAG.getRoot(); + } + + SDLoc dl = getCurSDLoc(); + + if (isVolatile) + Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG); + + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + SmallVector<SDValue, 4> Values(NumValues); + SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + EVT PtrVT = Ptr.getValueType(); + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // Serializing loads here may result in excessive register pressure, and + // TokenFactor places arbitrary choke points on the scheduler. SD scheduling + // could recover a bit by hoisting nodes upward in the chain by recognizing + // they are side-effect free or do not alias. The optimizer should really + // avoid this case by converting large object/array copies to llvm.memcpy + // (MaxParallelChains should always remain as failsafe). + if (ChainI == MaxParallelChains) { + assert(PendingLoads.empty() && "PendingLoads must be serialized first"); + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + Root = Chain; + ChainI = 0; + } + SDValue A = DAG.getNode(ISD::ADD, dl, + PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT), + &Flags); + SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, + A, MachinePointerInfo(SV, Offsets[i]), isVolatile, + isNonTemporal, isInvariant, Alignment, AAInfo, + Ranges); + + Values[i] = L; + Chains[ChainI] = L.getValue(1); + } + + if (!ConstantMemory) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + if (isVolatile) + DAG.setRoot(Chain); + else + PendingLoads.push_back(Chain); + } + + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(ValueVTs), Values)); +} + +void SelectionDAGBuilder::visitStore(const StoreInst &I) { + if (I.isAtomic()) + return visitAtomicStore(I); + + const Value *SrcV = I.getOperand(0); + const Value *PtrV = I.getOperand(1); + + SmallVector<EVT, 4> ValueVTs; + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), + SrcV->getType(), ValueVTs, &Offsets); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) + return; + + // Get the lowered operands. Note that we do this after + // checking if NumResults is zero, because with zero results + // the operands won't have values in the map. + SDValue Src = getValue(SrcV); + SDValue Ptr = getValue(PtrV); + + SDValue Root = getRoot(); + SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues)); + EVT PtrVT = Ptr.getValueType(); + bool isVolatile = I.isVolatile(); + bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; + unsigned Alignment = I.getAlignment(); + SDLoc dl = getCurSDLoc(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + // An aggregate load cannot wrap around the address space, so offsets to its + // parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + unsigned ChainI = 0; + for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { + // See visitLoad comments. + if (ChainI == MaxParallelChains) { + SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + Root = Chain; + ChainI = 0; + } + SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr, + DAG.getConstant(Offsets[i], dl, PtrVT), &Flags); + SDValue St = DAG.getStore(Root, dl, + SDValue(Src.getNode(), Src.getResNo() + i), + Add, MachinePointerInfo(PtrV, Offsets[i]), + isVolatile, isNonTemporal, Alignment, AAInfo); + Chains[ChainI] = St; + } + + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + makeArrayRef(Chains.data(), ChainI)); + DAG.setRoot(StoreNode); +} + +void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.store.*(Src0, Ptr, alignment, Mask) + Value *PtrOperand = I.getArgOperand(1); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, + MMO, false); + DAG.setRoot(StoreNode); + setValue(&I, StoreNode); +} + +// Get a uniform base for the Gather/Scatter intrinsic. +// The first argument of the Gather/Scatter intrinsic is a vector of pointers. +// We try to represent it as a base pointer + vector of indices. +// Usually, the vector of pointers comes from a 'getelementptr' instruction. +// The first operand of the GEP may be a single pointer or a vector of pointers +// Example: +// %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind +// or +// %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind +// %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, .. +// +// When the first GEP operand is a single pointer - it is the uniform base we +// are looking for. If first operand of the GEP is a splat vector - we +// extract the spalt value and use it as a uniform base. +// In all other cases the function returns 'false'. +// +static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index, + SelectionDAGBuilder* SDB) { + + SelectionDAG& DAG = SDB->DAG; + LLVMContext &Context = *DAG.getContext(); + + assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type"); + const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); + if (!GEP || GEP->getNumOperands() > 2) + return false; + + const Value *GEPPtr = GEP->getPointerOperand(); + if (!GEPPtr->getType()->isVectorTy()) + Ptr = GEPPtr; + else if (!(Ptr = getSplatValue(GEPPtr))) + return false; + + Value *IndexVal = GEP->getOperand(1); + + // The operands of the GEP may be defined in another basic block. + // In this case we'll not find nodes for the operands. + if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal)) + return false; + + Base = SDB->getValue(Ptr); + Index = SDB->getValue(IndexVal); + + // Suppress sign extension. + if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) { + if (SDB->findValue(Sext->getOperand(0))) { + IndexVal = Sext->getOperand(0); + Index = SDB->getValue(IndexVal); + } + } + if (!Index.getValueType().isVector()) { + unsigned GEPWidth = GEP->getType()->getVectorNumElements(); + EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth); + SmallVector<SDValue, 16> Ops(GEPWidth, Index); + Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops); + } + return true; +} + +void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask) + const Value *Ptr = I.getArgOperand(1); + SDValue Src0 = getValue(I.getArgOperand(0)); + SDValue Mask = getValue(I.getArgOperand(3)); + EVT VT = Src0.getValueType(); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + + SDValue Base; + SDValue Index; + const Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + + const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr; + MachineMemOperand *MMO = DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(MemOpBasePtr), + MachineMemOperand::MOStore, VT.getStoreSize(), + Alignment, AAInfo); + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + } + SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index }; + SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl, + Ops, MMO); + DAG.setRoot(Scatter); + setValue(&I, Scatter); +} + +void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) + Value *PtrOperand = I.getArgOperand(0); + SDValue Ptr = getValue(PtrOperand); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue InChain = DAG.getRoot(); + if (AA->pointsToConstantMemory(MemoryLocation( + PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + InChain = DAG.getEntryNode(); + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(PtrOperand), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, + ISD::NON_EXTLOAD); + SDValue OutChain = Load.getValue(1); + DAG.setRoot(OutChain); + setValue(&I, Load); +} + +void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) { + SDLoc sdl = getCurSDLoc(); + + // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) + const Value *Ptr = I.getArgOperand(0); + SDValue Src0 = getValue(I.getArgOperand(3)); + SDValue Mask = getValue(I.getArgOperand(2)); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); + if (!Alignment) + Alignment = DAG.getEVTAlignment(VT); + + AAMDNodes AAInfo; + I.getAAMetadata(AAInfo); + const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); + + SDValue Root = DAG.getRoot(); + SDValue Base; + SDValue Index; + const Value *BasePtr = Ptr; + bool UniformBase = getUniformBase(BasePtr, Base, Index, this); + bool ConstantMemory = false; + if (UniformBase && + AA->pointsToConstantMemory(MemoryLocation( + BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()), + AAInfo))) { + // Do not serialize (non-volatile) loads of constant memory with anything. + Root = DAG.getEntryNode(); + ConstantMemory = true; + } + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr), + MachineMemOperand::MOLoad, VT.getStoreSize(), + Alignment, AAInfo, Ranges); + + if (!UniformBase) { + Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout())); + Index = getValue(Ptr); + } + SDValue Ops[] = { Root, Src0, Mask, Base, Index }; + SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl, + Ops, MMO); + + SDValue OutChain = Gather.getValue(1); + if (!ConstantMemory) + PendingLoads.push_back(OutChain); + setValue(&I, Gather); +} + +void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { + SDLoc dl = getCurSDLoc(); + AtomicOrdering SuccessOrder = I.getSuccessOrdering(); + AtomicOrdering FailureOrder = I.getFailureOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); + SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); + SDValue L = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), + /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); + + SDValue OutChain = L.getValue(2); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { + SDLoc dl = getCurSDLoc(); + ISD::NodeType NT; + switch (I.getOperation()) { + default: llvm_unreachable("Unknown atomicrmw operation"); + case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; + case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; + case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; + case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; + case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; + case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; + case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; + case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; + case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; + case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; + case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; + } + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + SDValue L = + DAG.getAtomic(NT, dl, + getValue(I.getValOperand()).getSimpleValueType(), + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValOperand()), + I.getPointerOperand(), + /* Alignment=*/ 0, Order, Scope); + + SDValue OutChain = L.getValue(1); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitFence(const FenceInst &I) { + SDLoc dl = getCurSDLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDValue Ops[3]; + Ops[0] = getRoot(); + Ops[1] = DAG.getConstant(I.getOrdering(), dl, + TLI.getPointerTy(DAG.getDataLayout())); + Ops[2] = DAG.getConstant(I.getSynchScope(), dl, + TLI.getPointerTy(DAG.getDataLayout())); + DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); +} + +void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { + SDLoc dl = getCurSDLoc(); + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + + if (I.getAlignment() < VT.getSizeInBits() / 8) + report_fatal_error("Cannot generate unaligned atomic load"); + + MachineMemOperand *MMO = + DAG.getMachineFunction(). + getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), + MachineMemOperand::MOVolatile | + MachineMemOperand::MOLoad, + VT.getStoreSize(), + I.getAlignment() ? I.getAlignment() : + DAG.getEVTAlignment(VT)); + + InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); + SDValue L = + DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, + getValue(I.getPointerOperand()), MMO, + Order, Scope); + + SDValue OutChain = L.getValue(1); + + setValue(&I, L); + DAG.setRoot(OutChain); +} + +void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { + SDLoc dl = getCurSDLoc(); + + AtomicOrdering Order = I.getOrdering(); + SynchronizationScope Scope = I.getSynchScope(); + + SDValue InChain = getRoot(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT VT = + TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType()); + + if (I.getAlignment() < VT.getSizeInBits() / 8) + report_fatal_error("Cannot generate unaligned atomic store"); + + SDValue OutChain = + DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, + InChain, + getValue(I.getPointerOperand()), + getValue(I.getValueOperand()), + I.getPointerOperand(), I.getAlignment(), + Order, Scope); + + DAG.setRoot(OutChain); +} + +/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC +/// node. +void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, + unsigned Intrinsic) { + bool HasChain = !I.doesNotAccessMemory(); + bool OnlyLoad = HasChain && I.onlyReadsMemory(); + + // Build the operand list. + SmallVector<SDValue, 8> Ops; + if (HasChain) { // If this intrinsic has side-effects, chainify it. + if (OnlyLoad) { + // We don't need to serialize loads against other loads. + Ops.push_back(DAG.getRoot()); + } else { + Ops.push_back(getRoot()); + } + } + + // Info is set by getTgtMemInstrinsic + TargetLowering::IntrinsicInfo Info; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + + // Add the intrinsic ID as an integer operand if it's not a target intrinsic. + if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || + Info.opc == ISD::INTRINSIC_W_CHAIN) + Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(), + TLI.getPointerTy(DAG.getDataLayout()))); + + // Add all operands of the call to the operand list. + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); + Ops.push_back(Op); + } + + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + + if (HasChain) + ValueVTs.push_back(MVT::Other); + + SDVTList VTs = DAG.getVTList(ValueVTs); + + // Create the node. + SDValue Result; + if (IsTgtIntrinsic) { + // This is target intrinsic that touches memory + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), + VTs, Ops, Info.memVT, + MachinePointerInfo(Info.ptrVal, Info.offset), + Info.align, Info.vol, + Info.readMem, Info.writeMem, Info.size); + } else if (!HasChain) { + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); + } else if (!I.getType()->isVoidTy()) { + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); + } else { + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); + } + + if (HasChain) { + SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); + if (OnlyLoad) + PendingLoads.push_back(Chain); + else + DAG.setRoot(Chain); + } + + if (!I.getType()->isVoidTy()) { + if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { + EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy); + Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); + } + + setValue(&I, Result); + } +} + +/// GetSignificand - Get the significand and build it into a floating-point +/// number with exponent of 1: +/// +/// Op = (Op & 0x007fffff) | 0x3f800000; +/// +/// where Op is the hexadecimal representation of floating point value. +static SDValue +GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { + SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x007fffff, dl, MVT::i32)); + SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, + DAG.getConstant(0x3f800000, dl, MVT::i32)); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); +} + +/// GetExponent - Get the exponent: +/// +/// (float)(int)(((Op & 0x7f800000) >> 23) - 127); +/// +/// where Op is the hexadecimal representation of floating point value. +static SDValue +GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, + SDLoc dl) { + SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, + DAG.getConstant(0x7f800000, dl, MVT::i32)); + SDValue t1 = DAG.getNode( + ISD::SRL, dl, MVT::i32, t0, + DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout()))); + SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, + DAG.getConstant(127, dl, MVT::i32)); + return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); +} + +/// getF32Constant - Get 32-bit floating point constant. +static SDValue +getF32Constant(SelectionDAG &DAG, unsigned Flt, SDLoc dl) { + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl, + MVT::f32); +} + +static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, + SelectionDAG &DAG) { + // TODO: What fast-math-flags should be set on the floating-point nodes? + + // IntegerPartOfX = ((int32_t)(t0); + SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); + + // FractionalPartOfX = t0 - (float)IntegerPartOfX; + SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); + SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); + + // IntegerPartOfX <<= 23; + IntegerPartOfX = DAG.getNode( + ISD::SHL, dl, MVT::i32, IntegerPartOfX, + DAG.getConstant(23, dl, DAG.getTargetLoweringInfo().getPointerTy( + DAG.getDataLayout()))); + + SDValue TwoToFractionalPartOfX; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // TwoToFractionalPartOfX = + // 0.997535578f + + // (0.735607626f + 0.252464424f * x) * x; + // + // error 0.0144103317, which is 6 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3e814304, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f3c50c8, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // TwoToFractionalPartOfX = + // 0.999892986f + + // (0.696457318f + + // (0.224338339f + 0.792043434e-1f * x) * x) * x; + // + // error 0.000107046256, which is 13 to 14 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3da235e3, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3e65b8f3, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f324b07, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // TwoToFractionalPartOfX = + // 0.999999982f + + // (0.693148872f + + // (0.240227044f + + // (0.554906021e-1f + + // (0.961591928e-2f + + // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; + // error 2.47208000*10^(-7), which is better than 18 bits + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3924b03e, dl)); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3ab24b87, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3c1d8c17, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3d634a1d, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3e75fe14, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, + getF32Constant(DAG, 0x3f317234, dl)); + SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000, dl)); + } + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); +} + +/// expandExp - Lower an exp intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OFe 1.4426950f + // t0 = Op * LOG2OFe + + // TODO: What fast-math-flags should be set here? + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + getF32Constant(DAG, 0x3fb8aa3b, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); + } + + // No special expansion. + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); +} + +/// expandLog - Lower a log intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Scale the exponent by log(2) [0.69314718f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3f317218, dl)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + SDValue LogOfMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // LogofMantissa = + // -1.1609546f + + // (1.4034025f - 0.23903021f * x) * x; + // + // error 0.0034276066, which is better than 8 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbe74c456, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3fb3a2b1, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // LogOfMantissa = + // -1.7417939f + + // (2.8212026f + + // (-1.4699568f + + // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; + // + // error 0.000061011436, which is 14 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbd67b6d6, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ee4f4b8, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fbc278b, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40348e95, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // LogOfMantissa = + // -2.1072184f + + // (4.2372794f + + // (-3.7029485f + + // (2.2781945f + + // (-0.87823314f + + // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; + // + // error 0.0000023660568, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbc91e5ac, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e4350aa, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f60d3e3, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x4011cdf0, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x406cfd1c, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x408797cb, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab, dl)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); +} + +/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Get the exponent. + SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + // Different possible minimax approximations of significand in + // floating-point for various degrees of accuracy over [1,2]. + SDValue Log2ofMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; + // + // error 0.0049451742, which is more than 7 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbeb08fe0, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x40019463, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log2ofMantissa = + // -2.51285454f + + // (4.07009056f + + // (-2.12067489f + + // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; + // + // error 0.0000876136000, which is better than 13 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbda7262e, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f25280b, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x4007b923, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40823e2f, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log2ofMantissa = + // -3.0400495f + + // (6.1129976f + + // (-5.3420409f + + // (3.2865683f + + // (-1.2669343f + + // (0.27515199f - + // 0.25691327e-1f * x) * x) * x) * x) * x) * x; + // + // error 0.0000018516, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbcd2769e, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e8ce0b9, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fa22ae7, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x40525723, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x40aaf200, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, + getF32Constant(DAG, 0x40c39dad, dl)); + SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c, dl)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); +} + +/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + + // TODO: What fast-math-flags should be set on the floating-point nodes? + + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); + + // Scale the exponent by log10(2) [0.30102999f]. + SDValue Exp = GetExponent(DAG, Op1, TLI, dl); + SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, + getF32Constant(DAG, 0x3e9a209a, dl)); + + // Get the significand and build it into a floating-point number with + // exponent of 1. + SDValue X = GetSignificand(DAG, Op1, dl); + + SDValue Log10ofMantissa; + if (LimitFloatPrecision <= 6) { + // For floating-point precision of 6: + // + // Log10ofMantissa = + // -0.50419619f + + // (0.60948995f - 0.10380950f * x) * x; + // + // error 0.0014886165, which is 6 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0xbdd49a13, dl)); + SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3f1c0789, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300, dl)); + } else if (LimitFloatPrecision <= 12) { + // For floating-point precision of 12: + // + // Log10ofMantissa = + // -0.64831180f + + // (0.91751397f + + // (-0.31664806f + 0.47637168e-1f * x) * x) * x; + // + // error 0.00019228036, which is better than 12 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3d431f31, dl)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3ea21fb2, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f6ae232, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3, dl)); + } else { // LimitFloatPrecision <= 18 + // For floating-point precision of 18: + // + // Log10ofMantissa = + // -0.84299375f + + // (1.5327582f + + // (-1.0688956f + + // (0.49102474f + + // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; + // + // error 0.0000037995730, which is better than 18 bits + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, + getF32Constant(DAG, 0x3c5d51ce, dl)); + SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, + getF32Constant(DAG, 0x3e00685a, dl)); + SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); + SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3efb6798, dl)); + SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); + SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f88d192, dl)); + SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); + SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fc4316c, dl)); + SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70, dl)); + } + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); + } + + // No special expansion. + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); +} + +/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// limited-precision mode. +static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) + return getLimitedPrecisionExp2(Op, dl, DAG); + + // No special expansion. + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); +} + +/// visitPow - Lower a pow intrinsic. Handles the special sequences for +/// limited-precision mode with x == 10.0f. +static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const TargetLowering &TLI) { + bool IsExp10 = false; + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && + LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { + if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { + APFloat Ten(10.0f); + IsExp10 = LHSC->isExactlyValue(Ten); + } + } + + // TODO: What fast-math-flags should be set on the FMUL node? + if (IsExp10) { + // Put the exponent in the right bit position for later addition to the + // final result: + // + // #define LOG2OF10 3.3219281f + // t0 = Op * LOG2OF10; + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, + getF32Constant(DAG, 0x40549a78, dl)); + return getLimitedPrecisionExp2(t0, dl, DAG); + } + + // No special expansion. + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); +} + + +/// ExpandPowI - Expand a llvm.powi intrinsic. +static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, + SelectionDAG &DAG) { + // If RHS is a constant, we can expand this out to a multiplication tree, + // otherwise we end up lowering to a call to __powidf2 (for example). When + // optimizing for size, we only want to do this if the expansion would produce + // a small number of multiplies, otherwise we do the full expansion. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { + // Get the exponent as a positive value. + unsigned Val = RHSC->getSExtValue(); + if ((int)Val < 0) Val = -Val; + + // powi(x, 0) -> 1.0 + if (Val == 0) + return DAG.getConstantFP(1.0, DL, LHS.getValueType()); + + const Function *F = DAG.getMachineFunction().getFunction(); + if (!F->optForSize() || + // If optimizing for size, don't insert too many multiplies. + // This inserts up to 5 multiplies. + countPopulation(Val) + Log2_32(Val) < 7) { + // We use the simple binary decomposition method to generate the multiply + // sequence. There are more optimal ways to do this (for example, + // powi(x,15) generates one more multiply than it should), but this has + // the benefit of being both really simple and much better than a libcall. + SDValue Res; // Logically starts equal to 1.0 + SDValue CurSquare = LHS; + // TODO: Intrinsics should have fast-math-flags that propagate to these + // nodes. + while (Val) { + if (Val & 1) { + if (Res.getNode()) + Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); + else + Res = CurSquare; // 1.0*CurSquare. + } + + CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), + CurSquare, CurSquare); + Val >>= 1; + } + + // If the original was negative, invert the result, producing 1/(x*x*x). + if (RHSC->getSExtValue() < 0) + Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), + DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res); + return Res; + } + } + + // Otherwise, expand to a libcall. + return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); +} + +// getUnderlyingArgReg - Find underlying register used for a truncated or +// bitcasted argument. +static unsigned getUnderlyingArgReg(const SDValue &N) { + switch (N.getOpcode()) { + case ISD::CopyFromReg: + return cast<RegisterSDNode>(N.getOperand(1))->getReg(); + case ISD::BITCAST: + case ISD::AssertZext: + case ISD::AssertSext: + case ISD::TRUNCATE: + return getUnderlyingArgReg(N.getOperand(0)); + default: + return 0; + } +} + +/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function +/// argument, create the corresponding DBG_VALUE machine instruction for it now. +/// At the end of instruction selection, they will be inserted to the entry BB. +bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( + const Value *V, DILocalVariable *Variable, DIExpression *Expr, + DILocation *DL, int64_t Offset, bool IsIndirect, const SDValue &N) { + const Argument *Arg = dyn_cast<Argument>(V); + if (!Arg) + return false; + + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Ignore inlined function arguments here. + // + // FIXME: Should we be checking DL->inlinedAt() to determine this? + if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction())) + return false; + + Optional<MachineOperand> Op; + // Some arguments' frame index is recorded during argument lowering. + if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + Op = MachineOperand::CreateFI(FI); + + if (!Op && N.getNode()) { + unsigned Reg = getUnderlyingArgReg(N); + if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + unsigned PR = RegInfo.getLiveInPhysReg(Reg); + if (PR) + Reg = PR; + } + if (Reg) + Op = MachineOperand::CreateReg(Reg, false); + } + + if (!Op) { + // Check if ValueMap has reg number. + DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); + if (VMI != FuncInfo.ValueMap.end()) + Op = MachineOperand::CreateReg(VMI->second, false); + } + + if (!Op && N.getNode()) + // Check if frame index is available. + if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) + if (FrameIndexSDNode *FINode = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); + + if (!Op) + return false; + + assert(Variable->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + if (Op->isReg()) + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + Op->getReg(), Offset, Variable, Expr)); + else + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(Offset) + .addMetadata(Variable) + .addMetadata(Expr)); + + return true; +} + +// VisualStudio defines setjmp as _setjmp +#if defined(_MSC_VER) && defined(setjmp) && \ + !defined(setjmp_undefined_for_msvc) +# pragma push_macro("setjmp") +# undef setjmp +# define setjmp_undefined_for_msvc +#endif + +/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If +/// we want to emit this as a call to a named external function, return the name +/// otherwise lower it and return null. +const char * +SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc sdl = getCurSDLoc(); + DebugLoc dl = getCurDebugLoc(); + SDValue Res; + + switch (Intrinsic) { + default: + // By default, turn this into a target intrinsic node. + visitTargetIntrinsic(I, Intrinsic); + return nullptr; + case Intrinsic::vastart: visitVAStart(I); return nullptr; + case Intrinsic::vaend: visitVAEnd(I); return nullptr; + case Intrinsic::vacopy: visitVACopy(I); return nullptr; + case Intrinsic::returnaddress: + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return nullptr; + case Intrinsic::frameaddress: + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return nullptr; + case Intrinsic::read_register: { + Value *Reg = I.getArgOperand(0); + SDValue Chain = getRoot(); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + Res = DAG.getNode(ISD::READ_REGISTER, sdl, + DAG.getVTList(VT, MVT::Other), Chain, RegName); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return nullptr; + } + case Intrinsic::write_register: { + Value *Reg = I.getArgOperand(0); + Value *RegValue = I.getArgOperand(1); + SDValue Chain = getRoot(); + SDValue RegName = + DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); + DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, + RegName, getValue(RegValue))); + return nullptr; + } + case Intrinsic::setjmp: + return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + case Intrinsic::longjmp: + return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + case Intrinsic::memcpy: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MC); + return nullptr; + } + case Intrinsic::memset: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0))); + updateDAGForMaybeTailCall(MS); + return nullptr; + } + case Intrinsic::memmove: { + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. + bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + updateDAGForMaybeTailCall(MM); + return nullptr; + } + case Intrinsic::dbg_declare: { + const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); + const Value *Address = DI.getAddress(); + assert(Variable && "Missing variable"); + if (!Address) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + return nullptr; + } + + // Check if address has undef value. + if (isa<UndefValue>(Address) || + (Address->use_empty() && !isa<Argument>(Address))) { + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + return nullptr; + } + + SDValue &N = NodeMap[Address]; + if (!N.getNode() && isa<Argument>(Address)) + // Check unused arguments map. + N = UnusedArgNodeMap[Address]; + SDDbgValue *SDV; + if (N.getNode()) { + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) + Address = BCI->getOperand(0); + // Parameters are handled specially. + bool isParameter = Variable->isParameter() || isa<Argument>(Address); + auto FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); + if (isParameter && FINode) { + // Byval parameter. We have a frame index at this point. + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, + FINode->getIndex(), 0, dl, SDNodeOrder); + } else if (isa<Argument>(Address)) { + // Address is an argument, so try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N); + return nullptr; + } else { + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), + true, 0, dl, SDNodeOrder); + } + DAG.AddDbgValue(SDV, N.getNode(), isParameter); + } else { + // If Address is an argument then try to emit its dbg value using + // virtual register info from the FuncInfo.ValueMap. + if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, 0, false, + N)) { + // If variable is pinned by a alloca in dominating bb then + // use StaticAllocaMap. + if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { + if (AI->getParent() != DI.getParent()) { + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) { + SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, + 0, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + return nullptr; + } + } + } + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + } + } + return nullptr; + } + case Intrinsic::dbg_value: { + const DbgValueInst &DI = cast<DbgValueInst>(I); + assert(DI.getVariable() && "Missing variable"); + + DILocalVariable *Variable = DI.getVariable(); + DIExpression *Expression = DI.getExpression(); + uint64_t Offset = DI.getOffset(); + const Value *V = DI.getValue(); + if (!V) + return nullptr; + + SDDbgValue *SDV; + if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { + SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, + SDNodeOrder); + DAG.AddDbgValue(SDV, nullptr, false); + } else { + // Do not use getValue() in here; we don't want to generate code at + // this point if it hasn't been done yet. + SDValue N = NodeMap[V]; + if (!N.getNode() && isa<Argument>(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; + if (N.getNode()) { + if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset, + false, N)) { + SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), + false, Offset, dl, SDNodeOrder); + DAG.AddDbgValue(SDV, N.getNode(), false); + } + } else if (!V->use_empty() ) { + // Do not call getValue(V) yet, as we don't want to generate code. + // Remember it for later. + DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); + DanglingDebugInfoMap[V] = DDI; + } else { + // We may expand this to cover more cases. One case where we have no + // data available is an unreferenced parameter. + DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + } + } + + // Build a debug info table entry. + if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) + V = BCI->getOperand(0); + const AllocaInst *AI = dyn_cast<AllocaInst>(V); + // Don't handle byval struct arguments or VLAs, for example. + if (!AI) { + DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); + DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); + return nullptr; + } + DenseMap<const AllocaInst*, int>::iterator SI = + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) + return nullptr; // VLAs. + return nullptr; + } + + case Intrinsic::eh_typeid_for: { + // Find the type id for the given typeinfo. + GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); + unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); + Res = DAG.getConstant(TypeID, sdl, MVT::i32); + setValue(&I, Res); + return nullptr; + } + + case Intrinsic::eh_return_i32: + case Intrinsic::eh_return_i64: + DAG.getMachineFunction().getMMI().setCallsEHReturn(true); + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, + MVT::Other, + getControlRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::eh_unwind_init: + DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); + return nullptr; + case Intrinsic::eh_dwarf_cfa: { + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, + TLI.getPointerTy(DAG.getDataLayout())); + SDValue Offset = DAG.getNode(ISD::ADD, sdl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, + CfaArg.getValueType()), + CfaArg); + SDValue FA = DAG.getNode( + ISD::FRAMEADDR, sdl, TLI.getPointerTy(DAG.getDataLayout()), + DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()))); + setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), + FA, Offset)); + return nullptr; + } + case Intrinsic::eh_sjlj_callsite: { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); + assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); + assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); + + MMI.setCurrentCallSite(CI->getZExtValue()); + return nullptr; + } + case Intrinsic::eh_sjlj_functioncontext: { + // Get and store the index of the function context. + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + AllocaInst *FnCtx = + cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); + int FI = FuncInfo.StaticAllocaMap[FnCtx]; + MFI->setFunctionContextIndex(FI); + return nullptr; + } + case Intrinsic::eh_sjlj_setjmp: { + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, + DAG.getVTList(MVT::i32, MVT::Other), Ops); + setValue(&I, Op.getValue(0)); + DAG.setRoot(Op.getValue(1)); + return nullptr; + } + case Intrinsic::eh_sjlj_longjmp: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, + getRoot(), getValue(I.getArgOperand(0)))); + return nullptr; + } + case Intrinsic::eh_sjlj_setup_dispatch: { + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other, + getRoot())); + return nullptr; + } + + case Intrinsic::masked_gather: + visitMaskedGather(I); + return nullptr; + case Intrinsic::masked_load: + visitMaskedLoad(I); + return nullptr; + case Intrinsic::masked_scatter: + visitMaskedScatter(I); + return nullptr; + case Intrinsic::masked_store: + visitMaskedStore(I); + return nullptr; + case Intrinsic::x86_mmx_pslli_w: + case Intrinsic::x86_mmx_pslli_d: + case Intrinsic::x86_mmx_pslli_q: + case Intrinsic::x86_mmx_psrli_w: + case Intrinsic::x86_mmx_psrli_d: + case Intrinsic::x86_mmx_psrli_q: + case Intrinsic::x86_mmx_psrai_w: + case Intrinsic::x86_mmx_psrai_d: { + SDValue ShAmt = getValue(I.getArgOperand(1)); + if (isa<ConstantSDNode>(ShAmt)) { + visitTargetIntrinsic(I, Intrinsic); + return nullptr; + } + unsigned NewIntrinsic = 0; + EVT ShAmtVT = MVT::v2i32; + switch (Intrinsic) { + case Intrinsic::x86_mmx_pslli_w: + NewIntrinsic = Intrinsic::x86_mmx_psll_w; + break; + case Intrinsic::x86_mmx_pslli_d: + NewIntrinsic = Intrinsic::x86_mmx_psll_d; + break; + case Intrinsic::x86_mmx_pslli_q: + NewIntrinsic = Intrinsic::x86_mmx_psll_q; + break; + case Intrinsic::x86_mmx_psrli_w: + NewIntrinsic = Intrinsic::x86_mmx_psrl_w; + break; + case Intrinsic::x86_mmx_psrli_d: + NewIntrinsic = Intrinsic::x86_mmx_psrl_d; + break; + case Intrinsic::x86_mmx_psrli_q: + NewIntrinsic = Intrinsic::x86_mmx_psrl_q; + break; + case Intrinsic::x86_mmx_psrai_w: + NewIntrinsic = Intrinsic::x86_mmx_psra_w; + break; + case Intrinsic::x86_mmx_psrai_d: + NewIntrinsic = Intrinsic::x86_mmx_psra_d; + break; + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + } + + // The vector shift intrinsics with scalars uses 32b shift amounts but + // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits + // to be zero. + // We must do this early because v2i32 is not a legal type. + SDValue ShOps[2]; + ShOps[0] = ShAmt; + ShOps[1] = DAG.getConstant(0, sdl, MVT::i32); + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, + DAG.getConstant(NewIntrinsic, sdl, MVT::i32), + getValue(I.getArgOperand(0)), ShAmt); + setValue(&I, Res); + return nullptr; + } + case Intrinsic::convertff: + case Intrinsic::convertfsi: + case Intrinsic::convertfui: + case Intrinsic::convertsif: + case Intrinsic::convertuif: + case Intrinsic::convertss: + case Intrinsic::convertsu: + case Intrinsic::convertus: + case Intrinsic::convertuu: { + ISD::CvtCode Code = ISD::CVT_INVALID; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::convertff: Code = ISD::CVT_FF; break; + case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; + case Intrinsic::convertfui: Code = ISD::CVT_FU; break; + case Intrinsic::convertsif: Code = ISD::CVT_SF; break; + case Intrinsic::convertuif: Code = ISD::CVT_UF; break; + case Intrinsic::convertss: Code = ISD::CVT_SS; break; + case Intrinsic::convertsu: Code = ISD::CVT_SU; break; + case Intrinsic::convertus: Code = ISD::CVT_US; break; + case Intrinsic::convertuu: Code = ISD::CVT_UU; break; + } + EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + const Value *Op1 = I.getArgOperand(0); + Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), + DAG.getValueType(DestVT), + DAG.getValueType(getValue(Op1).getValueType()), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + Code); + setValue(&I, Res); + return nullptr; + } + case Intrinsic::powi: + setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); + return nullptr; + case Intrinsic::log: + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return nullptr; + case Intrinsic::log2: + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return nullptr; + case Intrinsic::log10: + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return nullptr; + case Intrinsic::exp: + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return nullptr; + case Intrinsic::exp2: + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); + return nullptr; + case Intrinsic::pow: + setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, TLI)); + return nullptr; + case Intrinsic::sqrt: + case Intrinsic::fabs: + case Intrinsic::sin: + case Intrinsic::cos: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + case Intrinsic::round: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::fabs: Opcode = ISD::FABS; break; + case Intrinsic::sin: Opcode = ISD::FSIN; break; + case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; + } + + setValue(&I, DAG.getNode(Opcode, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; + } + case Intrinsic::minnum: + setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::maxnum: + setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return nullptr; + case Intrinsic::fma: + setValue(&I, DAG.getNode(ISD::FMA, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + return nullptr; + case Intrinsic::fmuladd: { + EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); + if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && + TLI.isFMAFasterThanFMulAndFAdd(VT)) { + setValue(&I, DAG.getNode(ISD::FMA, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)))); + } else { + // TODO: Intrinsic calls should have fast-math-flags. + SDValue Mul = DAG.getNode(ISD::FMUL, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); + SDValue Add = DAG.getNode(ISD::FADD, sdl, + getValue(I.getArgOperand(0)).getValueType(), + Mul, + getValue(I.getArgOperand(2))); + setValue(&I, Add); + } + return nullptr; + } + case Intrinsic::convert_to_fp16: + setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, + DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, + getValue(I.getArgOperand(0)), + DAG.getTargetConstant(0, sdl, + MVT::i32)))); + return nullptr; + case Intrinsic::convert_from_fp16: + setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl, + TLI.getValueType(DAG.getDataLayout(), I.getType()), + DAG.getNode(ISD::BITCAST, sdl, MVT::f16, + getValue(I.getArgOperand(0))))); + return nullptr; + case Intrinsic::pcmarker: { + SDValue Tmp = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); + return nullptr; + } + case Intrinsic::readcyclecounter: { + SDValue Op = getRoot(); + Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, + DAG.getVTList(MVT::i64, MVT::Other), Op); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return nullptr; + } + case Intrinsic::bitreverse: + setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; + case Intrinsic::bswap: + setValue(&I, DAG.getNode(ISD::BSWAP, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); + return nullptr; + case Intrinsic::cttz: { + SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, + sdl, Ty, Arg)); + return nullptr; + } + case Intrinsic::ctlz: { + SDValue Arg = getValue(I.getArgOperand(0)); + ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, + sdl, Ty, Arg)); + return nullptr; + } + case Intrinsic::ctpop: { + SDValue Arg = getValue(I.getArgOperand(0)); + EVT Ty = Arg.getValueType(); + setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); + return nullptr; + } + case Intrinsic::stacksave: { + SDValue Op = getRoot(); + Res = DAG.getNode( + ISD::STACKSAVE, sdl, + DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op); + setValue(&I, Res); + DAG.setRoot(Res.getValue(1)); + return nullptr; + } + case Intrinsic::stackrestore: { + Res = getValue(I.getArgOperand(0)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); + return nullptr; + } + case Intrinsic::get_dynamic_area_offset: { + SDValue Op = getRoot(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType()); + // Result type for @llvm.get.dynamic.area.offset should match PtrTy for + // target. + if (PtrTy != ResTy) + report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset" + " intrinsic!"); + Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy), + Op); + DAG.setRoot(Op); + setValue(&I, Res); + return nullptr; + } + case Intrinsic::stackprotector: { + // Emit code into the DAG to store the stack guard onto the stack. + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout()); + SDValue Src, Chain = getRoot(); + const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); + const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); + + // See if Ptr is a bitcast. If it is, look through it and see if we can get + // global variable __stack_chk_guard. + if (!GV) + if (const Operator *BC = dyn_cast<Operator>(Ptr)) + if (BC->getOpcode() == Instruction::BitCast) + GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); + + if (GV && TLI.useLoadStackGuardNode()) { + // Emit a LOAD_STACK_GUARD node. + MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, + sdl, PtrTy, Chain); + MachinePointerInfo MPInfo(GV); + MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); + unsigned Flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOInvariant; + *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, + PtrTy.getSizeInBits() / 8, + DAG.getEVTAlignment(PtrTy)); + Node->setMemRefs(MemRefs, MemRefs + 1); + + // Copy the guard value to a virtual register so that it can be + // retrieved in the epilogue. + Src = SDValue(Node, 0); + const TargetRegisterClass *RC = + TLI.getRegClassFor(Src.getSimpleValueType()); + unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); + + SPDescriptor.setGuardReg(Reg); + Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); + } else { + Src = getValue(I.getArgOperand(0)); // The guard's value. + } + + AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); + + int FI = FuncInfo.StaticAllocaMap[Slot]; + MFI->setStackProtectorIndex(FI); + + SDValue FIN = DAG.getFrameIndex(FI, PtrTy); + + // Store the stack protector onto the stack. + Res = DAG.getStore(Chain, sdl, Src, FIN, MachinePointerInfo::getFixedStack( + DAG.getMachineFunction(), FI), + true, false, 0); + setValue(&I, Res); + DAG.setRoot(Res); + return nullptr; + } + case Intrinsic::objectsize: { + // If we don't know by now, we're never going to know. + ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); + + assert(CI && "Non-constant type in __builtin_object_size?"); + + SDValue Arg = getValue(I.getCalledValue()); + EVT Ty = Arg.getValueType(); + + if (CI->isZero()) + Res = DAG.getConstant(-1ULL, sdl, Ty); + else + Res = DAG.getConstant(0, sdl, Ty); + + setValue(&I, Res); + return nullptr; + } + case Intrinsic::annotation: + case Intrinsic::ptr_annotation: + // Drop the intrinsic, but forward the value + setValue(&I, getValue(I.getOperand(0))); + return nullptr; + case Intrinsic::assume: + case Intrinsic::var_annotation: + // Discard annotate attributes and assumptions + return nullptr; + + case Intrinsic::init_trampoline: { + const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); + + SDValue Ops[6]; + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); + Ops[5] = DAG.getSrcValue(F); + + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); + + DAG.setRoot(Res); + return nullptr; + } + case Intrinsic::adjust_trampoline: { + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, + TLI.getPointerTy(DAG.getDataLayout()), + getValue(I.getArgOperand(0)))); + return nullptr; + } + case Intrinsic::gcroot: + if (GFI) { + const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); + const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); + + FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); + GFI->addStackRoot(FI->getIndex(), TypeMap); + } + return nullptr; + case Intrinsic::gcread: + case Intrinsic::gcwrite: + llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); + case Intrinsic::flt_rounds: + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); + return nullptr; + + case Intrinsic::expect: { + // Just replace __builtin_expect(exp, c) with EXP. + setValue(&I, getValue(I.getArgOperand(0))); + return nullptr; + } + + case Intrinsic::debugtrap: + case Intrinsic::trap: { + StringRef TrapFuncName = + I.getAttributes() + .getAttribute(AttributeSet::FunctionIndex, "trap-func-name") + .getValueAsString(); + if (TrapFuncName.empty()) { + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::TRAP : ISD::DEBUGTRAP; + DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); + return nullptr; + } + TargetLowering::ArgListTy Args; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(sdl).setChain(getRoot()).setCallee( + CallingConv::C, I.getType(), + DAG.getExternalSymbol(TrapFuncName.data(), + TLI.getPointerTy(DAG.getDataLayout())), + std::move(Args), 0); + + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + DAG.setRoot(Result.second); + return nullptr; + } + + case Intrinsic::uadd_with_overflow: + case Intrinsic::sadd_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: { + ISD::NodeType Op; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; + case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; + case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; + case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; + case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; + case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; + } + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + + SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); + setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); + return nullptr; + } + case Intrinsic::prefetch: { + SDValue Ops[5]; + unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); + Ops[0] = getRoot(); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = getValue(I.getArgOperand(3)); + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, + DAG.getVTList(MVT::Other), Ops, + EVT::getIntegerVT(*Context, 8), + MachinePointerInfo(I.getArgOperand(0)), + 0, /* align */ + false, /* volatile */ + rw==0, /* read */ + rw==1)); /* write */ + return nullptr; + } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: { + bool IsStart = (Intrinsic == Intrinsic::lifetime_start); + // Stack coloring is not enabled in O0, discard region information. + if (TM.getOptLevel() == CodeGenOpt::None) + return nullptr; + + SmallVector<Value *, 4> Allocas; + GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); + + for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { + AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); + + // Could not find an Alloca. + if (!LifetimeObject) + continue; + + // First check that the Alloca is static, otherwise it won't have a + // valid frame index. + auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); + if (SI == FuncInfo.StaticAllocaMap.end()) + return nullptr; + + int FI = SI->second; + + SDValue Ops[2]; + Ops[0] = getRoot(); + Ops[1] = + DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()), true); + unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); + + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); + DAG.setRoot(Res); + } + return nullptr; + } + case Intrinsic::invariant_start: + // Discard region information. + setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout()))); + return nullptr; + case Intrinsic::invariant_end: + // Discard region information. + return nullptr; + case Intrinsic::stackprotectorcheck: { + // Do not actually emit anything for this basic block. Instead we initialize + // the stack protector descriptor and export the guard variable so we can + // access it in FinishBasicBlock. + const BasicBlock *BB = I.getParent(); + SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); + ExportFromCurrentBlock(SPDescriptor.getGuard()); + + // Flush our exports since we are going to process a terminator. + (void)getControlRoot(); + return nullptr; + } + case Intrinsic::clear_cache: + return TLI.getClearCacheBuiltinName(); + case Intrinsic::donothing: + // ignore + return nullptr; + case Intrinsic::experimental_stackmap: { + visitStackmap(I); + return nullptr; + } + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: { + visitPatchpoint(&I); + return nullptr; + } + case Intrinsic::experimental_gc_statepoint: { + visitStatepoint(I); + return nullptr; + } + case Intrinsic::experimental_gc_result: { + visitGCResult(I); + return nullptr; + } + case Intrinsic::experimental_gc_relocate: { + visitGCRelocate(cast<GCRelocateInst>(I)); + return nullptr; + } + case Intrinsic::instrprof_increment: + llvm_unreachable("instrprof failed to lower an increment"); + case Intrinsic::instrprof_value_profile: + llvm_unreachable("instrprof failed to lower a value profiling call"); + case Intrinsic::localescape: { + MachineFunction &MF = DAG.getMachineFunction(); + const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); + + // Directly emit some LOCAL_ESCAPE machine instrs. Label assignment emission + // is the same on all targets. + for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { + Value *Arg = I.getArgOperand(Idx)->stripPointerCasts(); + if (isa<ConstantPointerNull>(Arg)) + continue; // Skip null pointers. They represent a hole in index space. + AllocaInst *Slot = cast<AllocaInst>(Arg); + assert(FuncInfo.StaticAllocaMap.count(Slot) && + "can only escape static allocas"); + int FI = FuncInfo.StaticAllocaMap[Slot]; + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(MF.getName()), Idx); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, + TII->get(TargetOpcode::LOCAL_ESCAPE)) + .addSym(FrameAllocSym) + .addFrameIndex(FI); + } + + return nullptr; + } + + case Intrinsic::localrecover: { + // i8* @llvm.localrecover(i8* %fn, i8* %fp, i32 %idx) + MachineFunction &MF = DAG.getMachineFunction(); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout(), 0); + + // Get the symbol that defines the frame offset. + auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); + auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); + unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); + MCSymbol *FrameAllocSym = + MF.getMMI().getContext().getOrCreateFrameAllocSymbol( + GlobalValue::getRealLinkageName(Fn->getName()), IdxVal); + + // Create a MCSymbol for the label to avoid any target lowering + // that would make this PC relative. + SDValue OffsetSym = DAG.getMCSymbol(FrameAllocSym, PtrVT); + SDValue OffsetVal = + DAG.getNode(ISD::LOCAL_RECOVER, sdl, PtrVT, OffsetSym); + + // Add the offset to the FP. + Value *FP = I.getArgOperand(1); + SDValue FPVal = getValue(FP); + SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); + setValue(&I, Add); + + return nullptr; + } + + case Intrinsic::eh_exceptionpointer: + case Intrinsic::eh_exceptioncode: { + // Get the exception pointer vreg, copy from it, and resize it to fit. + const auto *CPI = cast<CatchPadInst>(I.getArgOperand(0)); + MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); + const TargetRegisterClass *PtrRC = TLI.getRegClassFor(PtrVT); + unsigned VReg = FuncInfo.getCatchPadExceptionPointerVReg(CPI, PtrRC); + SDValue N = + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), VReg, PtrVT); + if (Intrinsic == Intrinsic::eh_exceptioncode) + N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32); + setValue(&I, N); + return nullptr; + } + } +} + +std::pair<SDValue, SDValue> +SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB) { + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + MCSymbol *BeginLabel = nullptr; + + if (EHPadBB) { + // Insert a label before the invoke call to mark the try range. This can be + // used to detect deletion of the invoke via the MachineModuleInfo. + BeginLabel = MMI.getContext().createTempSymbol(); + + // For SjLj, keep track of which landing pads go with which invokes + // so as to maintain the ordering of pads in the LSDA. + unsigned CallSiteIndex = MMI.getCurrentCallSite(); + if (CallSiteIndex) { + MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); + LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex); + + // Now that the call site is handled, stop tracking it. + MMI.setCurrentCallSite(0); + } + + // Both PendingLoads and PendingExports must be flushed here; + // this call might not return. + (void)getRoot(); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); + + CLI.setChain(getRoot()); + } + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); + + assert((CLI.IsTailCall || Result.second.getNode()) && + "Non-null chain expected with non-tail call!"); + assert((Result.second.getNode() || !Result.first.getNode()) && + "Null value expected with tail call!"); + + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. + HasTailCall = true; + + // Since there's no actual continuation from this block, nothing can be + // relying on us setting vregs for them. + PendingExports.clear(); + } else { + DAG.setRoot(Result.second); + } + + if (EHPadBB) { + // Insert a label at the end of the invoke call to mark the try range. This + // can be used to detect deletion of the invoke via the MachineModuleInfo. + MCSymbol *EndLabel = MMI.getContext().createTempSymbol(); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); + + // Inform MachineModuleInfo of range. + if (MMI.hasEHFunclets()) { + assert(CLI.CS); + WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo(); + EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()), + BeginLabel, EndLabel); + } else { + MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel); + } + } + + return Result; +} + +void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, + bool isTailCall, + const BasicBlock *EHPadBB) { + PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); + FunctionType *FTy = cast<FunctionType>(PT->getElementType()); + Type *RetTy = FTy->getReturnType(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + Args.reserve(CS.arg_size()); + + for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); + i != e; ++i) { + const Value *V = *i; + + // Skip empty types + if (V->getType()->isEmptyTy()) + continue; + + SDValue ArgNode = getValue(V); + Entry.Node = ArgNode; Entry.Ty = V->getType(); + + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); + Args.push_back(Entry); + + // If we have an explicit sret argument that is an Instruction, (i.e., it + // might point to function-local memory), we can't meaningfully tail-call. + if (Entry.isSRet && isa<Instruction>(V)) + isTailCall = false; + } + + // Check if target-independent constraints permit a tail call here. + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) + isTailCall = false; + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(RetTy, FTy, Callee, std::move(Args), CS) + .setTailCall(isTailCall); + std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB); + + if (Result.first.getNode()) + setValue(CS.getInstruction(), Result.first); +} + +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { + for (const User *U : V->users()) { + if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) + if (IC->isEquality()) + if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, + Type *LoadTy, + SelectionDAGBuilder &Builder) { + + // Check to see if this load can be trivially constant folded, e.g. if the + // input is from a string literal. + if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { + // Cast pointer to the type we really want to load. + LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), + PointerType::getUnqual(LoadTy)); + + if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( + const_cast<Constant *>(LoadInput), *Builder.DL)) + return Builder.getValue(LoadCst); + } + + // Otherwise, we have to emit the load. If the pointer is to unfoldable but + // still constant memory, the input chain can be the entry node. + SDValue Root; + bool ConstantMemory = false; + + // Do not serialize (non-volatile) loads of constant memory with anything. + if (Builder.AA->pointsToConstantMemory(PtrVal)) { + Root = Builder.DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = Builder.DAG.getRoot(); + } + + SDValue Ptr = Builder.getValue(PtrVal); + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, + Ptr, MachinePointerInfo(PtrVal), + false /*volatile*/, + false /*nontemporal*/, + false /*isinvariant*/, 1 /* align=1 */); + + if (!ConstantMemory) + Builder.PendingLoads.push_back(LoadVal.getValue(1)); + return LoadVal; +} + +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} + +/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); + if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(), + I.getType(), true); + setValue(&I, DAG.getConstant(0, getCurSDLoc(), CallVT)); + return true; + } + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { + bool ActuallyDoIt = true; + MVT LoadVT; + Type *LoadTy; + switch (CSize->getZExtValue()) { + default: + LoadVT = MVT::Other; + LoadTy = nullptr; + ActuallyDoIt = false; + break; + case 2: + LoadVT = MVT::i16; + LoadTy = Type::getInt16Ty(CSize->getContext()); + break; + case 4: + LoadVT = MVT::i32; + LoadTy = Type::getInt32Ty(CSize->getContext()); + break; + case 8: + LoadVT = MVT::i64; + LoadTy = Type::getInt64Ty(CSize->getContext()); + break; + /* + case 16: + LoadVT = MVT::v4i32; + LoadTy = Type::getInt32Ty(CSize->getContext()); + LoadTy = VectorType::get(LoadTy, 4); + break; + */ + } + + // This turns into unaligned loads. We only do this if the target natively + // supports the MVT we'll be loading or if it is small enough (<= 4) that + // we'll only produce a small number of byte loads. + + // Require that we can find a legal MVT, and only do this if the target + // supports unaligned loads of that type. Expanding into byte loads would + // bloat the code. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (ActuallyDoIt && CSize->getZExtValue() > 4) { + unsigned DstAS = LHS->getType()->getPointerAddressSpace(); + unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); + // TODO: Handle 5 byte compare as 4-byte + 1 byte. + // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. + // TODO: Check alignment of src and dest ptrs. + if (!TLI.isTypeLegal(LoadVT) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || + !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) + ActuallyDoIt = false; + } + + if (ActuallyDoIt) { + SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); + SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); + + SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, + ISD::SETNE); + processIntegerCallValue(I, Res, false); + return true; + } + } + + + return false; +} + +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an +/// optimized form. If so, return true and lower it, otherwise return false +/// and it will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + // Verify that the prototype makes sense. char *strcpy(char *, char *) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int strcmp(void*,void*) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrLenCall -- See if we can lower a strlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strlen(char *) + if (I.getNumArgOperands() != 1) + return false; + + const Value *Arg0 = I.getArgOperand(0); + if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strnlen(char *, size_t) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair<SDValue, SDValue> Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitUnaryFloatCall - If a call instruction is a unary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a unary floating-point call. + if (I.getNumArgOperands() != 1 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp = getValue(I.getArgOperand(0)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); + return true; +} + +/// visitBinaryFloatCall - If a call instruction is a binary floating-point +/// operation (as expected), translate it to an SDNode with the specified opcode +/// and return true. +bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, + unsigned Opcode) { + // Sanity check that it really is a binary floating-point call. + if (I.getNumArgOperands() != 2 || + !I.getArgOperand(0)->getType()->isFloatingPointTy() || + I.getType() != I.getArgOperand(0)->getType() || + I.getType() != I.getArgOperand(1)->getType() || + !I.onlyReadsMemory()) + return false; + + SDValue Tmp0 = getValue(I.getArgOperand(0)); + SDValue Tmp1 = getValue(I.getArgOperand(1)); + EVT VT = Tmp0.getValueType(); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); + return true; +} + +void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa<InlineAsm>(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + + MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); + ComputeUsesVAFloatArgument(I, &MMI); + + const char *RenameFn = nullptr; + if (Function *F = I.getCalledFunction()) { + if (F->isDeclaration()) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { + if (unsigned IID = II->getIntrinsicID(F)) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + if (Intrinsic::ID IID = F->getIntrinsicID()) { + RenameFn = visitIntrinsicCall(I, IID); + if (!RenameFn) + return; + } + } + + // Check for well-known libc/libm calls. If the function is internal, it + // can't be a library call. + LibFunc::Func Func; + if (!F->hasLocalLinkage() && F->hasName() && + LibInfo->getLibFunc(F->getName(), Func) && + LibInfo->hasOptimizedCodeGen(Func)) { + switch (Func) { + default: break; + case LibFunc::copysign: + case LibFunc::copysignf: + case LibFunc::copysignl: + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType() && + I.onlyReadsMemory()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), + LHS.getValueType(), LHS, RHS)); + return; + } + break; + case LibFunc::fabs: + case LibFunc::fabsf: + case LibFunc::fabsl: + if (visitUnaryFloatCall(I, ISD::FABS)) + return; + break; + case LibFunc::fmin: + case LibFunc::fminf: + case LibFunc::fminl: + if (visitBinaryFloatCall(I, ISD::FMINNUM)) + return; + break; + case LibFunc::fmax: + case LibFunc::fmaxf: + case LibFunc::fmaxl: + if (visitBinaryFloatCall(I, ISD::FMAXNUM)) + return; + break; + case LibFunc::sin: + case LibFunc::sinf: + case LibFunc::sinl: + if (visitUnaryFloatCall(I, ISD::FSIN)) + return; + break; + case LibFunc::cos: + case LibFunc::cosf: + case LibFunc::cosl: + if (visitUnaryFloatCall(I, ISD::FCOS)) + return; + break; + case LibFunc::sqrt: + case LibFunc::sqrtf: + case LibFunc::sqrtl: + case LibFunc::sqrt_finite: + case LibFunc::sqrtf_finite: + case LibFunc::sqrtl_finite: + if (visitUnaryFloatCall(I, ISD::FSQRT)) + return; + break; + case LibFunc::floor: + case LibFunc::floorf: + case LibFunc::floorl: + if (visitUnaryFloatCall(I, ISD::FFLOOR)) + return; + break; + case LibFunc::nearbyint: + case LibFunc::nearbyintf: + case LibFunc::nearbyintl: + if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) + return; + break; + case LibFunc::ceil: + case LibFunc::ceilf: + case LibFunc::ceill: + if (visitUnaryFloatCall(I, ISD::FCEIL)) + return; + break; + case LibFunc::rint: + case LibFunc::rintf: + case LibFunc::rintl: + if (visitUnaryFloatCall(I, ISD::FRINT)) + return; + break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; + case LibFunc::trunc: + case LibFunc::truncf: + case LibFunc::truncl: + if (visitUnaryFloatCall(I, ISD::FTRUNC)) + return; + break; + case LibFunc::log2: + case LibFunc::log2f: + case LibFunc::log2l: + if (visitUnaryFloatCall(I, ISD::FLOG2)) + return; + break; + case LibFunc::exp2: + case LibFunc::exp2f: + case LibFunc::exp2l: + if (visitUnaryFloatCall(I, ISD::FEXP2)) + return; + break; + case LibFunc::memcmp: + if (visitMemCmpCall(I)) + return; + break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc::strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc::stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc::strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc::strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc::strnlen: + if (visitStrNLenCall(I)) + return; + break; + } + } + } + + SDValue Callee; + if (!RenameFn) + Callee = getValue(I.getCalledValue()); + else + Callee = DAG.getExternalSymbol( + RenameFn, + DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout())); + + // Check if we can potentially perform a tail call. More detailed checking is + // be done within LowerCallTo, after more information about the call is known. + LowerCallTo(&I, Callee, I.isTailCall()); +} + +namespace { + +/// AsmOperandInfo - This contains information for each constraint that we are +/// lowering. +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { +public: + /// CallOperand - If this is the result output operand or a clobber + /// this is null, otherwise it is the incoming operand to the CallInst. + /// This gets modified as the asm is processed. + SDValue CallOperand; + + /// AssignedRegs - If this is a register or register class operand, this + /// contains the set of register corresponding to the operand. + RegsForValue AssignedRegs; + + explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) + : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { + } + + /// getCallOperandValEVT - Return the EVT of the Value* that this operand + /// corresponds to. If there is no Value* for this operand, it returns + /// MVT::Other. + EVT getCallOperandValEVT(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL) const { + if (!CallOperandVal) return MVT::Other; + + if (isa<BasicBlock>(CallOperandVal)) + return TLI.getPointerTy(DL); + + llvm::Type *OpTy = CallOperandVal->getType(); + + // FIXME: code duplicated from TargetLowering::ParseConstraints(). + // If this is an indirect operand, the operand is a pointer to the + // accessed type. + if (isIndirect) { + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = DL.getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpTy = IntegerType::get(Context, BitSize); + break; + } + } + + return TLI.getValueType(DL, OpTy, true); + } +}; + +typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; + +} // end anonymous namespace + +/// GetRegistersForValue - Assign registers (virtual or physical) for the +/// specified operand. We prefer to assign virtual registers, to allow the +/// register allocator to handle the assignment process. However, if the asm +/// uses features that we can't model on machineinstrs, we have SDISel do the +/// allocation. This produces generally horrible, but correct, code. +/// +/// OpInfo describes the operand. +/// +static void GetRegistersForValue(SelectionDAG &DAG, + const TargetLowering &TLI, + SDLoc DL, + SDISelAsmOperandInfo &OpInfo) { + LLVMContext &Context = *DAG.getContext(); + + MachineFunction &MF = DAG.getMachineFunction(); + SmallVector<unsigned, 4> Regs; + + // If this is a constraint for a single physreg, or a constraint for a + // register class, find it. + std::pair<unsigned, const TargetRegisterClass *> PhysReg = + TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), + OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + + unsigned NumRegs = 1; + if (OpInfo.ConstraintVT != MVT::Other) { + // If this is a FP input in an integer register (or visa versa) insert a bit + // cast of the input value. More generally, handle any case where the input + // value disagrees with the register class we plan to stick this in. + if (OpInfo.Type == InlineAsm::isInput && + PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { + // Try to convert to the first EVT that the reg class contains. If the + // types are identical size, use a bitcast to convert (e.g. two differing + // vector types). + MVT RegVT = *PhysReg.second->vt_begin(); + if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { + // If the input is a FP value and we want it in FP registers, do a + // bitcast to the corresponding integer type. This turns an f64 value + // into i64, which can be passed with two i32 values on a 32-bit + // machine. + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, + RegVT, OpInfo.CallOperand); + OpInfo.ConstraintVT = RegVT; + } + } + + NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); + } + + MVT RegVT; + EVT ValueVT = OpInfo.ConstraintVT; + + // If this is a constraint for a specific physical register, like {r17}, + // assign it now. + if (unsigned AssignedReg = PhysReg.first) { + const TargetRegisterClass *RC = PhysReg.second; + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = *RC->vt_begin(); + + // Get the actual register value type. This is important, because the user + // may have asked for (e.g.) the AX register in i32 type. We need to + // remember that AX is actually i16 to get the right extension. + RegVT = *RC->vt_begin(); + + // This is a explicit reference to a physical register. + Regs.push_back(AssignedReg); + + // If this is an expanded reference, add the rest of the regs to Regs. + if (NumRegs != 1) { + TargetRegisterClass::iterator I = RC->begin(); + for (; *I != AssignedReg; ++I) + assert(I != RC->end() && "Didn't find reg!"); + + // Already added the first reg. + --NumRegs; ++I; + for (; NumRegs; --NumRegs, ++I) { + assert(I != RC->end() && "Ran out of registers to allocate!"); + Regs.push_back(*I); + } + } + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return; + } + + // Otherwise, if this was a reference to an LLVM register class, create vregs + // for this reference. + if (const TargetRegisterClass *RC = PhysReg.second) { + RegVT = *RC->vt_begin(); + if (OpInfo.ConstraintVT == MVT::Other) + ValueVT = RegVT; + + // Create the appropriate number of virtual registers. + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + for (; NumRegs; --NumRegs) + Regs.push_back(RegInfo.createVirtualRegister(RC)); + + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); + return; + } + + // Otherwise, we couldn't allocate enough registers for this. +} + +/// visitInlineAsm - Handle a call to an InlineAsm object. +/// +void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + + /// ConstraintOperands - Information about all of the constraints. + SDISelAsmOperandInfoVector ConstraintOperands; + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints( + DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS); + + bool hasMemory = false; + + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); + SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); + + MVT OpVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), + STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + // If this is an input or an indirect output, process the call argument. + // BasicBlocks are labels, currently appearing only in asm's. + if (OpInfo.CallOperandVal) { + if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { + OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); + } else { + OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); + } + + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, + DAG.getDataLayout()).getSimpleVT(); + } + + OpInfo.ConstraintVT = OpVT; + + // Indirect operand accesses access memory. + if (OpInfo.isIndirect) + hasMemory = true; + else { + for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { + TargetLowering::ConstraintType + CType = TLI.getConstraintType(OpInfo.Codes[j]); + if (CType == TargetLowering::C_Memory) { + hasMemory = true; + break; + } + } + } + } + + SDValue Chain, Flag; + + // We won't need to flush pending loads if this asm doesn't touch + // memory and is nonvolatile. + if (hasMemory || IA->hasSideEffects()) + Chain = getRoot(); + else + Chain = DAG.getRoot(); + + // Second pass over the constraints: compute which constraint option to use + // and assign registers to constraints that want a specific physreg. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + Input.ConstraintVT = OpInfo.ConstraintVT; + } + } + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is a memory input, and if the operand is not indirect, do what we + // need to to provide an address for the memory input. + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + !OpInfo.isIndirect) { + assert((OpInfo.isMultipleAlternative || + (OpInfo.Type == InlineAsm::isInput)) && + "Can only indirectify direct input operands!"); + + // Memory operands really want the address of the value. If we don't have + // an indirect input, put it in the constpool if we can, otherwise spill + // it to a stack slot. + // TODO: This isn't quite right. We need to handle these according to + // the addressing mode that the constraint wants. Also, this may take + // an additional register for the computation and we don't want that + // either. + + // If the operand is a float, integer, or vector constant, spill to a + // constant pool entry to get its address. + const Value *OpVal = OpInfo.CallOperandVal; + if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || + isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { + OpInfo.CallOperand = DAG.getConstantPool( + cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout())); + } else { + // Otherwise, create a stack slot and emit a store to it before the + // asm. + Type *Ty = OpVal->getType(); + auto &DL = DAG.getDataLayout(); + uint64_t TySize = DL.getTypeAllocSize(Ty); + unsigned Align = DL.getPrefTypeAlignment(Ty); + MachineFunction &MF = DAG.getMachineFunction(); + int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + SDValue StackSlot = + DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout())); + Chain = DAG.getStore( + Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), + false, false, 0); + OpInfo.CallOperand = StackSlot; + } + + // There is no longer a Value* corresponding to this operand. + OpInfo.CallOperandVal = nullptr; + + // It is now an indirect operand. + OpInfo.isIndirect = true; + } + + // If this constraint is for a specific register, allocate it before + // anything else. + if (OpInfo.ConstraintType == TargetLowering::C_Register) + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); + } + + // Second pass - Loop over all of the operands, assigning virtual or physregs + // to register class operands. + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + // C_Register operands have already been allocated, Other/Memory don't need + // to be. + if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) + GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); + } + + // AsmNodeOperands - The operands for the ISD::INLINEASM node. + std::vector<SDValue> AsmNodeOperands; + AsmNodeOperands.push_back(SDValue()); // reserve space for input chain + AsmNodeOperands.push_back(DAG.getTargetExternalSymbol( + IA->getAsmString().c_str(), TLI.getPointerTy(DAG.getDataLayout()))); + + // If we have a !srcloc metadata node associated with it, we want to attach + // this to the ultimately generated inline asm machineinstr. To do this, we + // pass in the third operand as this (potentially null) inline asm MDNode. + const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); + AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + + // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore + // bits as operand 3. + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + // Set the asm dialect. + ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; + + // Determine if this InlineAsm MayLoad or MayStore based on the constraints. + for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { + TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; + + // Compute the constraint code and ConstraintType to use. + TLI.ComputeConstraintToUse(OpInfo, SDValue()); + + // Ideally, we would only check against memory constraints. However, the + // meaning of an other constraint can be target-specific and we can't easily + // reason about it. Therefore, be conservative and set MayLoad/MayStore + // for other constriants as well. + if (OpInfo.ConstraintType == TargetLowering::C_Memory || + OpInfo.ConstraintType == TargetLowering::C_Other) { + if (OpInfo.Type == InlineAsm::isInput) + ExtraInfo |= InlineAsm::Extra_MayLoad; + else if (OpInfo.Type == InlineAsm::isOutput) + ExtraInfo |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); + } + } + + AsmNodeOperands.push_back(DAG.getTargetConstant( + ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + + // Loop over all of the inputs, copying the operand values into the + // appropriate registers and processing the output regs. + RegsForValue RetValRegs; + + // IndirectStoresToEmit - The set of stores to emit after the inline asm node. + std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; + + for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { + SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; + + switch (OpInfo.Type) { + case InlineAsm::isOutput: { + if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && + OpInfo.ConstraintType != TargetLowering::C_Register) { + // Memory output, or 'other' output (e.g. 'X' constraint). + assert(OpInfo.isIndirect && "Memory output must be indirect operand"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + // Add information to the INLINEASM node to know about this output. + unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, getCurSDLoc(), + MVT::i32)); + AsmNodeOperands.push_back(OpInfo.CallOperand); + break; + } + + // Otherwise, this is a register or register class output. + + // Copy the output from the appropriate register. Find a register that + // we can use. + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate output register for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + // If this is an indirect operand, store through the pointer after the + // asm. + if (OpInfo.isIndirect) { + IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, + OpInfo.CallOperandVal)); + } else { + // This is the result value of the call. + assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); + // Concatenate this output onto the outputs list. + RetValRegs.append(OpInfo.AssignedRegs); + } + + // Add information to the INLINEASM node to know that this register is + // set. + OpInfo.AssignedRegs + .AddInlineAsmOperands(OpInfo.isEarlyClobber + ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, getCurSDLoc(), DAG, AsmNodeOperands); + break; + } + case InlineAsm::isInput: { + SDValue InOperandVal = OpInfo.CallOperand; + + if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? + // If this is required to match an output register we have already set, + // just use its register. + unsigned OperandNo = OpInfo.getMatchedOperand(); + + // Scan until we find the definition we already emitted of this operand. + // When we find it, create a RegsForValue operand. + unsigned CurOp = InlineAsm::Op_FirstOperand; + for (; OperandNo; --OperandNo) { + // Advance to the next operand. + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + assert((InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag) || + InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); + CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; + } + + unsigned OpFlag = + cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); + if (InlineAsm::isRegDefKind(OpFlag) || + InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { + // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. + if (OpInfo.isIndirect) { + // This happens on gcc/testsuite/gcc.dg/pr8788-1.c + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + return; + } + + RegsForValue MatchedRegs; + MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); + MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); + MatchedRegs.RegVTs.push_back(RegVT); + MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); + for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); + i != e; ++i) { + if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); + else { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "inline asm error: This value" + " type register class is not natively supported!"); + return; + } + } + SDLoc dl = getCurSDLoc(); + // Use the produced MatchedRegs object to + MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl, + Chain, &Flag, CS.getInstruction()); + MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, + true, OpInfo.getMatchedOperand(), dl, + DAG, AsmNodeOperands); + break; + } + + assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); + assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && + "Unexpected number of operands"); + // Add information to the INLINEASM node to know about this input. + // See InlineAsm.h isUseOperandTiedToDef. + OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); + OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, + OpInfo.getMatchedOperand()); + AsmNodeOperands.push_back(DAG.getTargetConstant( + OpFlag, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); + break; + } + + // Treat indirect 'X' constraint as memory. + if (OpInfo.ConstraintType == TargetLowering::C_Other && + OpInfo.isIndirect) + OpInfo.ConstraintType = TargetLowering::C_Memory; + + if (OpInfo.ConstraintType == TargetLowering::C_Other) { + std::vector<SDValue> Ops; + TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); + if (Ops.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "invalid operand for inline asm constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = + InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); + AsmNodeOperands.push_back(DAG.getTargetConstant( + ResOpType, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout()))); + AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); + break; + } + + if (OpInfo.ConstraintType == TargetLowering::C_Memory) { + assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); + assert(InOperandVal.getValueType() == + TLI.getPointerTy(DAG.getDataLayout()) && + "Memory operands expect pointer values"); + + unsigned ConstraintID = + TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); + assert(ConstraintID != InlineAsm::Constraint_Unknown && + "Failed to convert memory constraint code to constraint id."); + + // Add information to the INLINEASM node to know about this input. + unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); + ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); + AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, + getCurSDLoc(), + MVT::i32)); + AsmNodeOperands.push_back(InOperandVal); + break; + } + + assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || + OpInfo.ConstraintType == TargetLowering::C_Register) && + "Unknown constraint type!"); + + // TODO: Support this. + if (OpInfo.isIndirect) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "Don't know how to handle indirect register inputs yet " + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + // Copy the input into the appropriate registers. + if (OpInfo.AssignedRegs.Regs.empty()) { + LLVMContext &Ctx = *DAG.getContext(); + Ctx.emitError(CS.getInstruction(), + "couldn't allocate input reg for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; + } + + SDLoc dl = getCurSDLoc(); + + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, dl, + Chain, &Flag, CS.getInstruction()); + + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, + dl, DAG, AsmNodeOperands); + break; + } + case InlineAsm::isClobber: { + // Add the clobbered value to the operand list, so that the register + // allocator is aware that the physreg got clobbered. + if (!OpInfo.AssignedRegs.Regs.empty()) + OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, + false, 0, getCurSDLoc(), DAG, + AsmNodeOperands); + break; + } + } + } + + // Finish up input operands. Set the input chain and add the flag last. + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; + if (Flag.getNode()) AsmNodeOperands.push_back(Flag); + + Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), + DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); + Flag = Chain.getValue(1); + + // If this asm returns a register value, copy the result from that register + // and set it as the value of the call. + if (!RetValRegs.Regs.empty()) { + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), + Chain, &Flag, CS.getInstruction()); + + // FIXME: Why don't we do this for inline asms with MRVs? + if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { + EVT ResultType = TLI.getValueType(DAG.getDataLayout(), CS.getType()); + + // If any of the results of the inline asm is a vector, it may have the + // wrong width/num elts. This can happen for register classes that can + // contain multiple different value types. The preg or vreg allocated may + // not have the same VT as was expected. Convert it to the right type + // with bit_convert. + if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { + Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), + ResultType, Val); + + } else if (ResultType != Val.getValueType() && + ResultType.isInteger() && Val.getValueType().isInteger()) { + // If a result value was tied to an input value, the computed result may + // have a wider width than the expected result. Extract the relevant + // portion. + Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); + } + + assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); + } + + setValue(CS.getInstruction(), Val); + // Don't need to use this as a chain in this case. + if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) + return; + } + + std::vector<std::pair<SDValue, const Value *> > StoresToEmit; + + // Process indirect outputs, first output all of the flagged copies out of + // physregs. + for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { + RegsForValue &OutRegs = IndirectStoresToEmit[i].first; + const Value *Ptr = IndirectStoresToEmit[i].second; + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), + Chain, &Flag, IA); + StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); + } + + // Emit the non-flagged stores from the physregs. + SmallVector<SDValue, 8> OutChains; + for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), + StoresToEmit[i].first, + getValue(StoresToEmit[i].second), + MachinePointerInfo(StoresToEmit[i].second), + false, false, 0); + OutChains.push_back(Val); + } + + if (!OutChains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); + + DAG.setRoot(Chain); +} + +void SelectionDAGBuilder::visitVAStart(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const DataLayout &DL = DAG.getDataLayout(); + SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()), + getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), + DAG.getSrcValue(I.getOperand(0)), + DL.getABITypeAlignment(I.getType())); + setValue(&I, V); + DAG.setRoot(V.getValue(1)); +} + +void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); +} + +void SelectionDAGBuilder::visitVACopy(const CallInst &I) { + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), + MVT::Other, getRoot(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); +} + +/// \brief Lower an argument list according to the target calling convention. +/// +/// \return A tuple of <return-value, token-chain> +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +std::pair<SDValue, SDValue> SelectionDAGBuilder::lowerCallOperands( + ImmutableCallSite CS, unsigned ArgIdx, unsigned NumArgs, SDValue Callee, + Type *ReturnTy, const BasicBlock *EHPadBB, bool IsPatchPoint) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + const Value *V = CS->getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) + .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args), NumArgs) + .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); + + return lowerInvokable(CLI, EHPadBB); +} + +/// \brief Add a stack map intrinsic call's live variable operands to a stackmap +/// or patchpoint target node's operand list. +/// +/// Constants are converted to TargetConstants purely as an optimization to +/// avoid constant materialization and register allocation. +/// +/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not +/// generate addess computation nodes, and so ExpandISelPseudo can convert the +/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids +/// address materialization and register allocation, but may also be required +/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an +/// alloca in the entry block, then the runtime may assume that the alloca's +/// StackMap location can be read immediately after compilation and that the +/// location is valid at any point during execution (this is similar to the +/// assumption made by the llvm.gcroot intrinsic). If the alloca's location were +/// only available in a register, then the runtime would need to trap when +/// execution reaches the StackMap in order to read the alloca's location. +static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, + SDLoc DL, SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { + SDValue OpVal = Builder.getValue(CS.getArgument(i)); + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { + Ops.push_back( + Builder.DAG.getTargetConstant(StackMaps::ConstantOp, DL, MVT::i64)); + Ops.push_back( + Builder.DAG.getTargetConstant(C->getSExtValue(), DL, MVT::i64)); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { + const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); + Ops.push_back(Builder.DAG.getTargetFrameIndex( + FI->getIndex(), TLI.getPointerTy(Builder.DAG.getDataLayout()))); + } else + Ops.push_back(OpVal); + } +} + +/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Chain, InFlag, Callee, NullPtr; + SmallVector<SDValue, 32> Ops; + + SDLoc DL = getCurSDLoc(); + Callee = getValue(CI.getCalledValue()); + NullPtr = DAG.getIntPtrConstant(0, DL, true); + + // The stackmap intrinsic only records the live variables (the arguemnts + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target specific lowering code. + // Instead we perform the call lowering right here. + // + // chain, flag = CALLSEQ_START(chain, 0) + // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) + // chain, flag = CALLSEQ_END(chain, 0, 0, flag) + // + Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); + InFlag = Chain.getValue(1); + + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), DL, MVT::i64)); + SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), DL, + MVT::i32)); + + // Push live variables for the stack map. + addStackMapLiveVars(&CI, 2, DL, Ops, *this); + + // We are not pushing any register mask info here on the operands list, + // because the stackmap doesn't clobber anything. + + // Push the chain and the glue flag. + Ops.push_back(Chain); + Ops.push_back(InFlag); + + // Create the STACKMAP node. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); + Chain = SDValue(SM, 0); + InFlag = Chain.getValue(1); + + Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); + + // Stackmaps don't generate values, so nothing goes into the NodeMap. + + // Set the root to the target-lowered call chain. + DAG.setRoot(Chain); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); +} + +/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, + const BasicBlock *EHPadBB) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, + // i32 <numBytes>, + // i8* <target>, + // i32 <numArgs>, + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CS.getCallingConv(); + bool IsAnyRegCC = CC == CallingConv::AnyReg; + bool HasDef = !CS->getType()->isVoidTy(); + SDLoc dl = getCurSDLoc(); + SDValue Callee = getValue(CS->getOperand(PatchPointOpers::TargetPos)); + + // Handle immediate and symbolic callees. + if (auto* ConstCallee = dyn_cast<ConstantSDNode>(Callee)) + Callee = DAG.getIntPtrConstant(ConstCallee->getZExtValue(), dl, + /*isTarget=*/true); + else if (auto* SymbolicCallee = dyn_cast<GlobalAddressSDNode>(Callee)) + Callee = DAG.getTargetGlobalAddress(SymbolicCallee->getGlobal(), + SDLoc(SymbolicCallee), + SymbolicCallee->getValueType(0)); + + // Get the real number of arguments participating in the call <numArgs> + SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); + unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); + + // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> + // Intrinsics include all meta-operands up to but not including CC. + unsigned NumMetaOpers = PatchPointOpers::CCPos; + assert(CS.arg_size() >= NumMetaOpers + NumArgs && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; + Type *ReturnTy = + IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); + std::pair<SDValue, SDValue> Result = lowerCallOperands( + CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy, EHPadBB, true); + + SDNode *CallEnd = Result.second.getNode(); + if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool HasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector<SDValue, 8> Ops; + + // Add the <id> and <numBytes> constants. + SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(IDVal)->getZExtValue(), dl, MVT::i64)); + SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(DAG.getTargetConstant( + cast<ConstantSDNode>(NBytesVal)->getZExtValue(), dl, + MVT::i32)); + + // Add the callee. + Ops.push_back(Callee); + + // Adjust <numArgs> to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); + NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, dl, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, dl, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (IsAnyRegCC) + for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) + Ops.push_back(getValue(CS.getArgument(i))); + + // Push the arguments from the call instruction up to the register mask. + SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; + Ops.append(Call->op_begin() + 2, e); + + // Push live variables for the stack map. + addStackMapLiveVars(CS, NumMetaOpers + NumArgs, dl, Ops, *this); + + // Push the register mask info. + if (HasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (HasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (IsAnyRegCC && HasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector<EVT, 3> ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), CS->getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + dl, NodeTys, Ops); + + // Update the NodeMap. + if (HasDef) { + if (IsAnyRegCC) + setValue(CS.getInstruction(), SDValue(MN, 0)); + else + setValue(CS.getInstruction(), Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (IsAnyRegCC && HasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); + + // Inform the Frame Information that we have a patchpoint in this function. + FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); +} + +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + +/// TargetLowering::LowerCallTo - This is the default LowerCallTo +/// implementation, which just calls LowerCall. +/// FIXME: When all targets are +/// migrated to using LowerCall, this hook should be integrated into SDISel. +std::pair<SDValue, SDValue> +TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { + // Handle the incoming return values from the call. + CLI.Ins.clear(); + Type *OrigRetTy = CLI.RetTy; + SmallVector<EVT, 4> RetTys; + SmallVector<uint64_t, 4> Offsets; + auto &DL = CLI.DAG.getDataLayout(); + ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this, DL); + + bool CanLowerReturn = + this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), + CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + if (!CanLowerReturn) { + // FIXME: equivalent assert? + // assert(!CS.hasInAllocaArgument() && + // "sret demotion is incompatible with inalloca"); + uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy); + unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); + + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL)); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.isReturned = false; + Entry.Alignment = Align; + CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + + // sret demotion isn't compatible with tail-calls, since the sret argument + // points into the callers stack frame. + CLI.IsTailCall = false; + } else { + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } + } + } + + // Handle all of the outgoing arguments. + CLI.Outs.clear(); + CLI.OutVals.clear(); + ArgListTy &Args = CLI.getArgs(); + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs); + Type *FinalType = Args[i].Ty; + if (Args[i].isByVal) + FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); + bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( + FinalType, CLI.CallConv, CLI.IsVarArg); + for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; + ++Value) { + EVT VT = ValueVTs[Value]; + Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); + SDValue Op = SDValue(Args[i].Node.getNode(), + Args[i].Node.getResNo() + Value); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + if (Args[i].isZExt) + Flags.setZExt(); + if (Args[i].isSExt) + Flags.setSExt(); + if (Args[i].isInReg) + Flags.setInReg(); + if (Args[i].isSRet) + Flags.setSRet(); + if (Args[i].isByVal) + Flags.setByVal(); + if (Args[i].isInAlloca) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (Args[i].isByVal || Args[i].isInAlloca) { + PointerType *Ty = cast<PointerType>(Args[i].Ty); + Type *ElementTy = Ty->getElementType(); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); + // For ByVal, alignment should come from FE. BE will guess if this + // info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (Args[i].Alignment) + FrameAlign = Args[i].Alignment; + else + FrameAlign = getByValTypeAlignment(ElementTy, DL); + Flags.setByValAlign(FrameAlign); + } + if (Args[i].isNest) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + Flags.setOrigAlign(OriginalAlignment); + + MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); + SmallVector<SDValue, 4> Parts(NumParts); + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (Args[i].isSExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (Args[i].isZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // Conservatively only handle 'returned' on non-vectors for now + if (Args[i].isReturned && !Op.getValueType().isVector()) { + assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && + "unexpected use of 'returned'"); + // Before passing 'returned' to the target lowering code, ensure that + // either the register MVT and the actual EVT are the same size or that + // the return value and argument are extended in the same way; in these + // cases it's safe to pass the argument register value unchanged as the + // return register value (although it's at the target's option whether + // to do so) + // TODO: allow code generation to take advantage of partially preserved + // registers rather than clobbering the entire register when the + // parameter extension method is not compatible with the return + // extension method + if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || + (ExtendKind != ISD::ANY_EXTEND && + CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) + Flags.setReturned(); + } + + getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, + CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); + + for (unsigned j = 0; j != NumParts; ++j) { + // if it isn't first piece, alignment must be 1 + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, + i < CLI.NumFixedArgs, + i, j*Parts[j].getValueType().getStoreSize()); + if (NumParts > 1 && j == 0) + MyFlags.Flags.setSplit(); + else if (j != 0) { + MyFlags.Flags.setOrigAlign(1); + if (j == NumParts - 1) + MyFlags.Flags.setSplitEnd(); + } + + CLI.Outs.push_back(MyFlags); + CLI.OutVals.push_back(Parts[j]); + } + + if (NeedsRegBlock && Value == NumValues - 1) + CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); + } + } + + SmallVector<SDValue, 4> InVals; + CLI.Chain = LowerCall(CLI, InVals); + + // Verify that the target's LowerCall behaved as expected. + assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && + "LowerCall didn't return a valid chain!"); + assert((!CLI.IsTailCall || InVals.empty()) && + "LowerCall emitted a return value for a tail call!"); + assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && + "LowerCall didn't emit the correct number of values!"); + + // For a tail call, the return value is merely live-out and there aren't + // any nodes in the DAG representing it. Return a special value to + // indicate that a tail call has been emitted and no more Instructions + // should be processed in the current block. + if (CLI.IsTailCall) { + CLI.DAG.setRoot(CLI.Chain); + return std::make_pair(SDValue(), SDValue()); + } + + DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerCall emitted a null value!"); + assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && + "LowerCall emitted a value with the wrong type!"); + }); + + SmallVector<SDValue, 4> ReturnValues; + if (!CanLowerReturn) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); + + ComputeValueVTs(*this, DL, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + unsigned NumValues = RetTys.size(); + ReturnValues.resize(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + // An aggregate return value cannot wrap around the address space, so + // offsets to its parts don't wrap either. + SDNodeFlags Flags; + Flags.setNoUnsignedWrap(true); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + CLI.DAG.getConstant(Offsets[i], CLI.DL, + PtrVT), &Flags); + SDValue L = CLI.DAG.getLoad( + RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack(CLI.DAG.getMachineFunction(), + DemoteStackIdx, Offsets[i]), + false, false, false, 1); + ReturnValues[i] = L; + Chains[i] = L.getValue(1); + } + + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); + } else { + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, nullptr, + AssertOp)); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + } + + SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, + CLI.DAG.getVTList(RetTys), ReturnValues); + return std::make_pair(Res, CLI.Chain); +} + +void TargetLowering::LowerOperationWrapper(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) const { + SDValue Res = LowerOperation(SDValue(N, 0), DAG); + if (Res.getNode()) + Results.push_back(Res); +} + +SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { + llvm_unreachable("LowerOperation not implemented for this target!"); +} + +void +SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { + SDValue Op = getNonRegisterValue(V); + assert((Op.getOpcode() != ISD::CopyFromReg || + cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && + "Copy from a reg to the same reg!"); + assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg, + V->getType()); + SDValue Chain = DAG.getEntryNode(); + + ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == + FuncInfo.PreferredExtendType.end()) + ? ISD::ANY_EXTEND + : FuncInfo.PreferredExtendType[V]; + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); + PendingExports.push_back(Chain); +} + +#include "llvm/CodeGen/SelectionDAGISel.h" + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + if (FastISel) + return A->use_empty(); + + const BasicBlock &Entry = A->getParent()->front(); + for (const User *U : A->users()) + if (cast<Instruction>(U)->getParent() != &Entry || isa<SwitchInst>(U)) + return false; // Use not in entry block. + + return true; +} + +void SelectionDAGISel::LowerArguments(const Function &F) { + SelectionDAG &DAG = SDB->DAG; + SDLoc dl = SDB->getCurSDLoc(); + const DataLayout &DL = DAG.getDataLayout(); + SmallVector<ISD::InputArg, 16> Ins; + + if (!FuncInfo->CanLowerReturn) { + // Put in an sret pointer parameter before all the other parameters. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); + + // NOTE: Assuming that a pointer will never break down to more than one VT + // or one register. + ISD::ArgFlagsTy Flags; + Flags.setSRet(); + MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, + ISD::InputArg::NoArgIndex, 0); + Ins.push_back(RetArg); + } + + // Set up the incoming argument description vector. + unsigned Idx = 1; + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); + I != E; ++I, ++Idx) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); + bool isArgValueUsed = !I->use_empty(); + unsigned PartBase = 0; + Type *FinalType = I->getType(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + FinalType = cast<PointerType>(FinalType)->getElementType(); + bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( + FinalType, F.getCallingConv(), F.isVarArg()); + for (unsigned Value = 0, NumValues = ValueVTs.size(); + Value != NumValues; ++Value) { + EVT VT = ValueVTs[Value]; + Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); + ISD::ArgFlagsTy Flags; + unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy); + + if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + Flags.setZExt(); + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + Flags.setSExt(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) + Flags.setInReg(); + if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) + Flags.setSRet(); + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) + Flags.setByVal(); + if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { + Flags.setInAlloca(); + // Set the byval flag for CCAssignFn callbacks that don't know about + // inalloca. This way we can know how many bytes we should've allocated + // and how many bytes a callee cleanup function will pop. If we port + // inalloca to more targets, we'll have to add custom inalloca handling + // in the various CC lowering callbacks. + Flags.setByVal(); + } + if (F.getCallingConv() == CallingConv::X86_INTR) { + // IA Interrupt passes frame (1st parameter) by value in the stack. + if (Idx == 1) + Flags.setByVal(); + } + if (Flags.isByVal() || Flags.isInAlloca()) { + PointerType *Ty = cast<PointerType>(I->getType()); + Type *ElementTy = Ty->getElementType(); + Flags.setByValSize(DL.getTypeAllocSize(ElementTy)); + // For ByVal, alignment should be passed from FE. BE will guess if + // this info is not there but there are cases it cannot get right. + unsigned FrameAlign; + if (F.getParamAlignment(Idx)) + FrameAlign = F.getParamAlignment(Idx); + else + FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL); + Flags.setByValAlign(FrameAlign); + } + if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) + Flags.setNest(); + if (NeedsRegBlock) + Flags.setInConsecutiveRegs(); + Flags.setOrigAlign(OriginalAlignment); + + MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + Idx-1, PartBase+i*RegisterVT.getStoreSize()); + if (NumRegs > 1 && i == 0) + MyFlags.Flags.setSplit(); + // if it isn't first piece, alignment must be 1 + else if (i > 0) { + MyFlags.Flags.setOrigAlign(1); + if (i == NumRegs - 1) + MyFlags.Flags.setSplitEnd(); + } + Ins.push_back(MyFlags); + } + if (NeedsRegBlock && Value == NumValues - 1) + Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); + PartBase += VT.getStoreSize(); + } + } + + // Call the target to set up the argument values. + SmallVector<SDValue, 8> InVals; + SDValue NewRoot = TLI->LowerFormalArguments( + DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); + + // Verify that the target's LowerFormalArguments behaved as expected. + assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && + "LowerFormalArguments didn't return a valid chain!"); + assert(InVals.size() == Ins.size() && + "LowerFormalArguments didn't emit the correct number of values!"); + DEBUG({ + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + assert(InVals[i].getNode() && + "LowerFormalArguments emitted a null value!"); + assert(EVT(Ins[i].VT) == InVals[i].getValueType() && + "LowerFormalArguments emitted a value with the wrong type!"); + } + }); + + // Update the DAG with the new chain value resulting from argument lowering. + DAG.setRoot(NewRoot); + + // Set up the argument values. + unsigned i = 0; + Idx = 1; + if (!FuncInfo->CanLowerReturn) { + // Create a virtual register for the sret pointer, and put in a copy + // from the sret argument into it. + SmallVector<EVT, 1> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); + MVT VT = ValueVTs[0].getSimpleVT(); + MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + ISD::NodeType AssertOp = ISD::DELETED_NODE; + SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, + RegVT, VT, nullptr, AssertOp); + + MachineFunction& MF = SDB->DAG.getMachineFunction(); + MachineRegisterInfo& RegInfo = MF.getRegInfo(); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); + FuncInfo->DemoteRegister = SRetReg; + NewRoot = + SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); + DAG.setRoot(NewRoot); + + // i indexes lowered arguments. Bump it past the hidden sret argument. + // Idx indexes LLVM arguments. Don't touch it. + ++i; + } + + for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; + ++I, ++Idx) { + SmallVector<SDValue, 4> ArgValues; + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(*TLI, DAG.getDataLayout(), I->getType(), ValueVTs); + unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) { + SDB->setUnusedArgValue(&*I, InVals[i]); + + // Also remember any frame index for use in FastISel. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + } + + for (unsigned Val = 0; Val != NumValues; ++Val) { + EVT VT = ValueVTs[Val]; + MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); + + if (!I->use_empty()) { + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) + AssertOp = ISD::AssertSext; + else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) + AssertOp = ISD::AssertZext; + + ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], + NumParts, PartVT, VT, + nullptr, AssertOp)); + } + + i += NumParts; + } + + // We don't need to do anything else for unused arguments. + if (ArgValues.empty()) + continue; + + // Note down frame index. + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + + SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), + SDB->getCurSDLoc()); + + SDB->setValue(&*I, Res); + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { + if (LoadSDNode *LNode = + dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) + if (FrameIndexSDNode *FI = + dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) + FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex()); + } + + // If this argument is live outside of the entry block, insert a copy from + // wherever we got it to the vreg that other BB's will reference it as. + if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { + // If we can, though, try to skip creating an unnecessary vreg. + // FIXME: This isn't very clean... it would be nice to make this more + // general. It's also subtly incompatible with the hacks FastISel + // uses with vregs. + unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + FuncInfo->ValueMap[&*I] = Reg; + continue; + } + } + if (!isOnlyUsedInEntryBlock(&*I, TM.Options.EnableFastISel)) { + FuncInfo->InitializeRegForValue(&*I); + SDB->CopyToExportRegsIfNeeded(&*I); + } + } + + assert(i == InVals.size() && "Argument register count mismatch!"); + + // Finally, if the target has anything special to do, allow it to do so. + EmitFunctionEntryCode(); +} + +/// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to +/// ensure constants are generated when needed. Remember the virtual registers +/// that need to be added to the Machine PHI nodes as input. We cannot just +/// directly add them, because expansion might result in multiple MBB's for one +/// BB. As such, the start of the BB might correspond to a different MBB than +/// the end. +/// +void +SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { + const TerminatorInst *TI = LLVMBB->getTerminator(); + + SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; + + // Check PHI nodes in successors that expect a value to be available from this + // block. + for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { + const BasicBlock *SuccBB = TI->getSuccessor(succ); + if (!isa<PHINode>(SuccBB->begin())) continue; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; + + // If this terminator has multiple identical successors (common for + // switches), only handle each succ once. + if (!SuccsHandled.insert(SuccMBB).second) + continue; + + MachineBasicBlock::iterator MBBI = SuccMBB->begin(); + + // At this point we know that there is a 1-1 correspondence between LLVM PHI + // nodes and Machine PHI nodes, but the incoming operands have not been + // emitted yet. + for (BasicBlock::const_iterator I = SuccBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) { + // Ignore dead phi's. + if (PN->use_empty()) continue; + + // Skip empty types + if (PN->getType()->isEmptyTy()) + continue; + + unsigned Reg; + const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); + + if (const Constant *C = dyn_cast<Constant>(PHIOp)) { + unsigned &RegOut = ConstantsOut[C]; + if (RegOut == 0) { + RegOut = FuncInfo.CreateRegs(C->getType()); + CopyValueToVirtualRegister(C, RegOut); + } + Reg = RegOut; + } else { + DenseMap<const Value *, unsigned>::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { + assert(isa<AllocaInst>(PHIOp) && + FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && + "Didn't codegen value into a register!??"); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); + CopyValueToVirtualRegister(PHIOp, Reg); + } + } + + // Remember that this register needs to added to the machine PHI node as + // the input for this MBB. + SmallVector<EVT, 4> ValueVTs; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, DAG.getDataLayout(), PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + for (unsigned i = 0, e = NumRegisters; i != e; ++i) + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + Reg += NumRegisters; + } + } + } + + ConstantsOut.clear(); +} + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + bool IsLikely, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI(ParentMBB); + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor( + SuccMBB, BranchProbabilityInfo::getBranchProbStackProtector(IsLikely)); + return SuccMBB; +} + +MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { + MachineFunction::iterator I(MBB); + if (++I == FuncInfo.MF->end()) + return nullptr; + return &*I; +} + +/// During lowering new call nodes can be created (such as memset, etc.). +/// Those will become new roots of the current DAG, but complications arise +/// when they are tail calls. In such cases, the call lowering will update +/// the root, but the builder still needs to know that a tail call has been +/// lowered in order to avoid generating an additional return. +void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) { + // If the node is null, we do have a tail call. + if (MaybeTC.getNode() != nullptr) + DAG.setRoot(MaybeTC); + else + HasTailCall = true; +} + +bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters, + unsigned *TotalCases, unsigned First, + unsigned Last) { + assert(Last >= First); + assert(TotalCases[Last] >= TotalCases[First]); + + APInt LowCase = Clusters[First].Low->getValue(); + APInt HighCase = Clusters[Last].High->getValue(); + assert(LowCase.getBitWidth() == HighCase.getBitWidth()); + + // FIXME: A range of consecutive cases has 100% density, but only requires one + // comparison to lower. We should discriminate against such consecutive ranges + // in jump tables. + + uint64_t Diff = (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100); + uint64_t Range = Diff + 1; + + uint64_t NumCases = + TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]); + + assert(NumCases < UINT64_MAX / 100); + assert(Range >= NumCases); + + return NumCases * 100 >= Range * MinJumpTableDensity; +} + +static inline bool areJTsAllowed(const TargetLowering &TLI) { + return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); +} + +bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, + CaseCluster &JTCluster) { + assert(First <= Last); + + auto Prob = BranchProbability::getZero(); + unsigned NumCmps = 0; + std::vector<MachineBasicBlock*> Table; + DenseMap<MachineBasicBlock*, BranchProbability> JTProbs; + + // Initialize probabilities in JTProbs. + for (unsigned I = First; I <= Last; ++I) + JTProbs[Clusters[I].MBB] = BranchProbability::getZero(); + + for (unsigned I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Prob += Clusters[I].Prob; + APInt Low = Clusters[I].Low->getValue(); + APInt High = Clusters[I].High->getValue(); + NumCmps += (Low == High) ? 1 : 2; + if (I != First) { + // Fill the gap between this and the previous cluster. + APInt PreviousHigh = Clusters[I - 1].High->getValue(); + assert(PreviousHigh.slt(Low)); + uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1; + for (uint64_t J = 0; J < Gap; J++) + Table.push_back(DefaultMBB); + } + uint64_t ClusterSize = (High - Low).getLimitedValue() + 1; + for (uint64_t J = 0; J < ClusterSize; ++J) + Table.push_back(Clusters[I].MBB); + JTProbs[Clusters[I].MBB] += Clusters[I].Prob; + } + + unsigned NumDests = JTProbs.size(); + if (isSuitableForBitTests(NumDests, NumCmps, + Clusters[First].Low->getValue(), + Clusters[Last].High->getValue())) { + // Clusters[First..Last] should be lowered as bit tests instead. + return false; + } + + // Create the MBB that will load from and jump through the table. + // Note: We create it here, but it's not inserted into the function yet. + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *JumpTableMBB = + CurMF->CreateMachineBasicBlock(SI->getParent()); + + // Add successors. Note: use table order for determinism. + SmallPtrSet<MachineBasicBlock *, 8> Done; + for (MachineBasicBlock *Succ : Table) { + if (Done.count(Succ)) + continue; + addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]); + Done.insert(Succ); + } + JumpTableMBB->normalizeSuccProbs(); + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding()) + ->createJumpTableIndex(Table); + + // Set up the jump table info. + JumpTable JT(-1U, JTI, JumpTableMBB, nullptr); + JumpTableHeader JTH(Clusters[First].Low->getValue(), + Clusters[Last].High->getValue(), SI->getCondition(), + nullptr, false); + JTCases.emplace_back(std::move(JTH), std::move(JT)); + + JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High, + JTCases.size() - 1, Prob); + return true; +} + +void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters, + const SwitchInst *SI, + MachineBasicBlock *DefaultMBB) { +#ifndef NDEBUG + // Clusters must be non-empty, sorted, and only contain Range clusters. + assert(!Clusters.empty()); + for (CaseCluster &C : Clusters) + assert(C.Kind == CC_Range); + for (unsigned i = 1, e = Clusters.size(); i < e; ++i) + assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!areJTsAllowed(TLI)) + return; + + const int64_t N = Clusters.size(); + const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries(); + + // TotalCases[i]: Total nbr of cases in Clusters[0..i]. + SmallVector<unsigned, 8> TotalCases(N); + + for (unsigned i = 0; i < N; ++i) { + APInt Hi = Clusters[i].High->getValue(); + APInt Lo = Clusters[i].Low->getValue(); + TotalCases[i] = (Hi - Lo).getLimitedValue() + 1; + if (i != 0) + TotalCases[i] += TotalCases[i - 1]; + } + + if (N >= MinJumpTableSize && isDense(Clusters, &TotalCases[0], 0, N - 1)) { + // Cheap case: the whole range might be suitable for jump table. + CaseCluster JTCluster; + if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) { + Clusters[0] = JTCluster; + Clusters.resize(1); + return; + } + } + + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // Split Clusters into minimum number of dense partitions. The algorithm uses + // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code + // for the Case Statement'" (1994), but builds the MinPartitions array in + // reverse order to make it easier to reconstruct the partitions in ascending + // order. In the choice between two optimal partitionings, it picks the one + // which yields more jump tables. + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1]. + SmallVector<unsigned, 8> NumTables(N); + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + assert(MinJumpTableSize > 1); + NumTables[N - 1] = 0; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; i--) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + NumTables[i] = NumTables[i + 1]; + + // Search for a solution that results in fewer partitions. + for (int64_t j = N - 1; j > i; j--) { + // Try building a partition from Clusters[i..j]. + if (isDense(Clusters, &TotalCases[0], i, j)) { + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + bool IsTable = j - i + 1 >= MinJumpTableSize; + unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]); + + // If this j leads to fewer partitions, or same number of partitions + // with more lookup tables, it is a better partitioning. + if (NumPartitions < MinPartitions[i] || + (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) { + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + NumTables[i] = Tables; + } + } + } + } + + // Iterate over the partitions, replacing some with jump tables in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(Last >= First); + assert(DstIndex <= First); + unsigned NumClusters = Last - First + 1; + + CaseCluster JTCluster; + if (NumClusters >= MinJumpTableSize && + buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) { + Clusters[DstIndex++] = JTCluster; + } else { + for (unsigned I = First; I <= Last; ++I) + std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I])); + } + } + Clusters.resize(DstIndex); +} + +bool SelectionDAGBuilder::rangeFitsInWord(const APInt &Low, const APInt &High) { + // FIXME: Using the pointer type doesn't seem ideal. + uint64_t BW = DAG.getDataLayout().getPointerSizeInBits(); + uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; + return Range <= BW; +} + +bool SelectionDAGBuilder::isSuitableForBitTests(unsigned NumDests, + unsigned NumCmps, + const APInt &Low, + const APInt &High) { + // FIXME: I don't think NumCmps is the correct metric: a single case and a + // range of cases both require only one branch to lower. Just looking at the + // number of clusters and destinations should be enough to decide whether to + // build bit tests. + + // To lower a range with bit tests, the range must fit the bitwidth of a + // machine word. + if (!rangeFitsInWord(Low, High)) + return false; + + // Decide whether it's profitable to lower this range with bit tests. Each + // destination requires a bit test and branch, and there is an overall range + // check branch. For a small number of clusters, separate comparisons might be + // cheaper, and for many destinations, splitting the range might be better. + return (NumDests == 1 && NumCmps >= 3) || + (NumDests == 2 && NumCmps >= 5) || + (NumDests == 3 && NumCmps >= 6); +} + +bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters, + unsigned First, unsigned Last, + const SwitchInst *SI, + CaseCluster &BTCluster) { + assert(First <= Last); + if (First == Last) + return false; + + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + unsigned NumCmps = 0; + for (int64_t I = First; I <= Last; ++I) { + assert(Clusters[I].Kind == CC_Range); + Dests.set(Clusters[I].MBB->getNumber()); + NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2; + } + unsigned NumDests = Dests.count(); + + APInt Low = Clusters[First].Low->getValue(); + APInt High = Clusters[Last].High->getValue(); + assert(Low.slt(High)); + + if (!isSuitableForBitTests(NumDests, NumCmps, Low, High)) + return false; + + APInt LowBound; + APInt CmpRange; + + const int BitWidth = DAG.getTargetLoweringInfo() + .getPointerTy(DAG.getDataLayout()) + .getSizeInBits(); + assert(rangeFitsInWord(Low, High) && "Case range must fit in bit mask!"); + + // Check if the clusters cover a contiguous range such that no value in the + // range will jump to the default statement. + bool ContiguousRange = true; + for (int64_t I = First + 1; I <= Last; ++I) { + if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) { + ContiguousRange = false; + break; + } + } + + if (Low.isStrictlyPositive() && High.slt(BitWidth)) { + // Optimize the case where all the case values fit in a word without having + // to subtract minValue. In this case, we can optimize away the subtraction. + LowBound = APInt::getNullValue(Low.getBitWidth()); + CmpRange = High; + ContiguousRange = false; + } else { + LowBound = Low; + CmpRange = High - Low; + } + + CaseBitsVector CBV; + auto TotalProb = BranchProbability::getZero(); + for (unsigned i = First; i <= Last; ++i) { + // Find the CaseBits for this destination. + unsigned j; + for (j = 0; j < CBV.size(); ++j) + if (CBV[j].BB == Clusters[i].MBB) + break; + if (j == CBV.size()) + CBV.push_back( + CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero())); + CaseBits *CB = &CBV[j]; + + // Update Mask, Bits and ExtraProb. + uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue(); + uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue(); + assert(Hi >= Lo && Hi < 64 && "Invalid bit case!"); + CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo; + CB->Bits += Hi - Lo + 1; + CB->ExtraProb += Clusters[i].Prob; + TotalProb += Clusters[i].Prob; + } + + BitTestInfo BTI; + std::sort(CBV.begin(), CBV.end(), [](const CaseBits &a, const CaseBits &b) { + // Sort by probability first, number of bits second. + if (a.ExtraProb != b.ExtraProb) + return a.ExtraProb > b.ExtraProb; + return a.Bits > b.Bits; + }); + + for (auto &CB : CBV) { + MachineBasicBlock *BitTestBB = + FuncInfo.MF->CreateMachineBasicBlock(SI->getParent()); + BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb)); + } + BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange), + SI->getCondition(), -1U, MVT::Other, false, + ContiguousRange, nullptr, nullptr, std::move(BTI), + TotalProb); + + BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High, + BitTestCases.size() - 1, TotalProb); + return true; +} + +void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters, + const SwitchInst *SI) { +// Partition Clusters into as few subsets as possible, where each subset has a +// range that fits in a machine word and has <= 3 unique destinations. + +#ifndef NDEBUG + // Clusters must be sorted and contain Range or JumpTable clusters. + assert(!Clusters.empty()); + assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable); + for (const CaseCluster &C : Clusters) + assert(C.Kind == CC_Range || C.Kind == CC_JumpTable); + for (unsigned i = 1; i < Clusters.size(); ++i) + assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue())); +#endif + + // The algorithm below is not suitable for -O0. + if (TM.getOptLevel() == CodeGenOpt::None) + return; + + // If target does not have legal shift left, do not emit bit tests at all. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT PTy = TLI.getPointerTy(DAG.getDataLayout()); + if (!TLI.isOperationLegal(ISD::SHL, PTy)) + return; + + int BitWidth = PTy.getSizeInBits(); + const int64_t N = Clusters.size(); + + // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1]. + SmallVector<unsigned, 8> MinPartitions(N); + // LastElement[i] is the last element of the partition starting at i. + SmallVector<unsigned, 8> LastElement(N); + + // FIXME: This might not be the best algorithm for finding bit test clusters. + + // Base case: There is only one way to partition Clusters[N-1]. + MinPartitions[N - 1] = 1; + LastElement[N - 1] = N - 1; + + // Note: loop indexes are signed to avoid underflow. + for (int64_t i = N - 2; i >= 0; --i) { + // Find optimal partitioning of Clusters[i..N-1]. + // Baseline: Put Clusters[i] into a partition on its own. + MinPartitions[i] = MinPartitions[i + 1] + 1; + LastElement[i] = i; + + // Search for a solution that results in fewer partitions. + // Note: the search is limited by BitWidth, reducing time complexity. + for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) { + // Try building a partition from Clusters[i..j]. + + // Check the range. + if (!rangeFitsInWord(Clusters[i].Low->getValue(), + Clusters[j].High->getValue())) + continue; + + // Check nbr of destinations and cluster types. + // FIXME: This works, but doesn't seem very efficient. + bool RangesOnly = true; + BitVector Dests(FuncInfo.MF->getNumBlockIDs()); + for (int64_t k = i; k <= j; k++) { + if (Clusters[k].Kind != CC_Range) { + RangesOnly = false; + break; + } + Dests.set(Clusters[k].MBB->getNumber()); + } + if (!RangesOnly || Dests.count() > 3) + break; + + // Check if it's a better partition. + unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]); + if (NumPartitions < MinPartitions[i]) { + // Found a better partition. + MinPartitions[i] = NumPartitions; + LastElement[i] = j; + } + } + } + + // Iterate over the partitions, replacing with bit-test clusters in-place. + unsigned DstIndex = 0; + for (unsigned First = 0, Last; First < N; First = Last + 1) { + Last = LastElement[First]; + assert(First <= Last); + assert(DstIndex <= First); + + CaseCluster BitTestCluster; + if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) { + Clusters[DstIndex++] = BitTestCluster; + } else { + size_t NumClusters = Last - First + 1; + std::memmove(&Clusters[DstIndex], &Clusters[First], + sizeof(Clusters[0]) * NumClusters); + DstIndex += NumClusters; + } + } + Clusters.resize(DstIndex); +} + +void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB) { + MachineFunction *CurMF = FuncInfo.MF; + MachineBasicBlock *NextMBB = nullptr; + MachineFunction::iterator BBI(W.MBB); + if (++BBI != FuncInfo.MF->end()) + NextMBB = &*BBI; + + unsigned Size = W.LastCluster - W.FirstCluster + 1; + + BranchProbabilityInfo *BPI = FuncInfo.BPI; + + if (Size == 2 && W.MBB == SwitchMBB) { + // If any two of the cases has the same destination, and if one value + // is the same as the other, but has one bit unset that the other has set, + // use bit manipulation to do two compares at once. For example: + // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" + // TODO: This could be extended to merge any 2 cases in switches with 3 + // cases. + // TODO: Handle cases where W.CaseBB != SwitchBB. + CaseCluster &Small = *W.FirstCluster; + CaseCluster &Big = *W.LastCluster; + + if (Small.Low == Small.High && Big.Low == Big.High && + Small.MBB == Big.MBB) { + const APInt &SmallValue = Small.Low->getValue(); + const APInt &BigValue = Big.Low->getValue(); + + // Check that there is only one bit different. + APInt CommonBit = BigValue ^ SmallValue; + if (CommonBit.isPowerOf2()) { + SDValue CondLHS = getValue(Cond); + EVT VT = CondLHS.getValueType(); + SDLoc DL = getCurSDLoc(); + + SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, + DAG.getConstant(CommonBit, DL, VT)); + SDValue Cond = DAG.getSetCC( + DL, MVT::i1, Or, DAG.getConstant(BigValue | SmallValue, DL, VT), + ISD::SETEQ); + + // Update successor info. + // Both Small and Big will jump to Small.BB, so we sum up the + // probabilities. + addSuccessorWithProb(SwitchMBB, Small.MBB, Small.Prob + Big.Prob); + if (BPI) + addSuccessorWithProb( + SwitchMBB, DefaultMBB, + // The default destination is the first successor in IR. + BPI->getEdgeProbability(SwitchMBB->getBasicBlock(), (unsigned)0)); + else + addSuccessorWithProb(SwitchMBB, DefaultMBB); + + // Insert the true branch. + SDValue BrCond = + DAG.getNode(ISD::BRCOND, DL, MVT::Other, getControlRoot(), Cond, + DAG.getBasicBlock(Small.MBB)); + // Insert the false branch. + BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, + DAG.getBasicBlock(DefaultMBB)); + + DAG.setRoot(BrCond); + return; + } + } + } + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Order cases by probability so the most likely case will be checked first. + std::sort(W.FirstCluster, W.LastCluster + 1, + [](const CaseCluster &a, const CaseCluster &b) { + return a.Prob > b.Prob; + }); + + // Rearrange the case blocks so that the last one falls through if possible + // without without changing the order of probabilities. + for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster; ) { + --I; + if (I->Prob > W.LastCluster->Prob) + break; + if (I->Kind == CC_Range && I->MBB == NextMBB) { + std::swap(*I, *W.LastCluster); + break; + } + } + } + + // Compute total probability. + BranchProbability DefaultProb = W.DefaultProb; + BranchProbability UnhandledProbs = DefaultProb; + for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I) + UnhandledProbs += I->Prob; + + MachineBasicBlock *CurMBB = W.MBB; + for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) { + MachineBasicBlock *Fallthrough; + if (I == W.LastCluster) { + // For the last cluster, fall through to the default destination. + Fallthrough = DefaultMBB; + } else { + Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock()); + CurMF->insert(BBI, Fallthrough); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + UnhandledProbs -= I->Prob; + + switch (I->Kind) { + case CC_JumpTable: { + // FIXME: Optimize away range check based on pivot comparisons. + JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first; + JumpTable *JT = &JTCases[I->JTCasesIndex].second; + + // The jump block hasn't been inserted yet; insert it here. + MachineBasicBlock *JumpMBB = JT->MBB; + CurMF->insert(BBI, JumpMBB); + + auto JumpProb = I->Prob; + auto FallthroughProb = UnhandledProbs; + + // If the default statement is a target of the jump table, we evenly + // distribute the default probability to successors of CurMBB. Also + // update the probability on the edge from JumpMBB to Fallthrough. + for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(), + SE = JumpMBB->succ_end(); + SI != SE; ++SI) { + if (*SI == DefaultMBB) { + JumpProb += DefaultProb / 2; + FallthroughProb -= DefaultProb / 2; + JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); + break; + } + } + + addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); + addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); + + // The jump table header will be inserted in our current block, do the + // range check, and fall through to our fallthrough block. + JTH->HeaderBB = CurMBB; + JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader. + + // If we're in the right place, emit the jump table header right now. + if (CurMBB == SwitchMBB) { + visitJumpTableHeader(*JT, *JTH, SwitchMBB); + JTH->Emitted = true; + } + break; + } + case CC_BitTests: { + // FIXME: Optimize away range check based on pivot comparisons. + BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex]; + + // The bit test blocks haven't been inserted yet; insert them here. + for (BitTestCase &BTC : BTB->Cases) + CurMF->insert(BBI, BTC.ThisBB); + + // Fill in fields of the BitTestBlock. + BTB->Parent = CurMBB; + BTB->Default = Fallthrough; + + BTB->DefaultProb = UnhandledProbs; + // If the cases in bit test don't form a contiguous range, we evenly + // distribute the probability on the edge to Fallthrough to two + // successors of CurMBB. + if (!BTB->ContiguousRange) { + BTB->Prob += DefaultProb / 2; + BTB->DefaultProb -= DefaultProb / 2; + } + + // If we're in the right place, emit the bit test header right now. + if (CurMBB == SwitchMBB) { + visitBitTestHeader(*BTB, SwitchMBB); + BTB->Emitted = true; + } + break; + } + case CC_Range: { + const Value *RHS, *LHS, *MHS; + ISD::CondCode CC; + if (I->Low == I->High) { + // Check Cond == I->Low. + CC = ISD::SETEQ; + LHS = Cond; + RHS=I->Low; + MHS = nullptr; + } else { + // Check I->Low <= Cond <= I->High. + CC = ISD::SETLE; + LHS = I->Low; + MHS = Cond; + RHS = I->High; + } + + // The false probability is the sum of all unhandled cases. + CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB, I->Prob, + UnhandledProbs); + + if (CurMBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); + + break; + } + } + CurMBB = Fallthrough; + } +} + +unsigned SelectionDAGBuilder::caseClusterRank(const CaseCluster &CC, + CaseClusterIt First, + CaseClusterIt Last) { + return std::count_if(First, Last + 1, [&](const CaseCluster &X) { + if (X.Prob != CC.Prob) + return X.Prob > CC.Prob; + + // Ties are broken by comparing the case value. + return X.Low->getValue().slt(CC.Low->getValue()); + }); +} + +void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList, + const SwitchWorkListItem &W, + Value *Cond, + MachineBasicBlock *SwitchMBB) { + assert(W.FirstCluster->Low->getValue().slt(W.LastCluster->Low->getValue()) && + "Clusters not sorted?"); + + assert(W.LastCluster - W.FirstCluster + 1 >= 2 && "Too small to split!"); + + // Balance the tree based on branch probabilities to create a near-optimal (in + // terms of search time given key frequency) binary search tree. See e.g. Kurt + // Mehlhorn "Nearly Optimal Binary Search Trees" (1975). + CaseClusterIt LastLeft = W.FirstCluster; + CaseClusterIt FirstRight = W.LastCluster; + auto LeftProb = LastLeft->Prob + W.DefaultProb / 2; + auto RightProb = FirstRight->Prob + W.DefaultProb / 2; + + // Move LastLeft and FirstRight towards each other from opposite directions to + // find a partitioning of the clusters which balances the probability on both + // sides. If LeftProb and RightProb are equal, alternate which side is + // taken to ensure 0-probability nodes are distributed evenly. + unsigned I = 0; + while (LastLeft + 1 < FirstRight) { + if (LeftProb < RightProb || (LeftProb == RightProb && (I & 1))) + LeftProb += (++LastLeft)->Prob; + else + RightProb += (--FirstRight)->Prob; + I++; + } + + for (;;) { + // Our binary search tree differs from a typical BST in that ours can have up + // to three values in each leaf. The pivot selection above doesn't take that + // into account, which means the tree might require more nodes and be less + // efficient. We compensate for this here. + + unsigned NumLeft = LastLeft - W.FirstCluster + 1; + unsigned NumRight = W.LastCluster - FirstRight + 1; + + if (std::min(NumLeft, NumRight) < 3 && std::max(NumLeft, NumRight) > 3) { + // If one side has less than 3 clusters, and the other has more than 3, + // consider taking a cluster from the other side. + + if (NumLeft < NumRight) { + // Consider moving the first cluster on the right to the left side. + CaseCluster &CC = *FirstRight; + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + if (LeftSideRank <= RightSideRank) { + // Moving the cluster to the left does not demote it. + ++LastLeft; + ++FirstRight; + continue; + } + } else { + assert(NumRight < NumLeft); + // Consider moving the last element on the left to the right side. + CaseCluster &CC = *LastLeft; + unsigned LeftSideRank = caseClusterRank(CC, W.FirstCluster, LastLeft); + unsigned RightSideRank = caseClusterRank(CC, FirstRight, W.LastCluster); + if (RightSideRank <= LeftSideRank) { + // Moving the cluster to the right does not demot it. + --LastLeft; + --FirstRight; + continue; + } + } + } + break; + } + + assert(LastLeft + 1 == FirstRight); + assert(LastLeft >= W.FirstCluster); + assert(FirstRight <= W.LastCluster); + + // Use the first element on the right as pivot since we will make less-than + // comparisons against it. + CaseClusterIt PivotCluster = FirstRight; + assert(PivotCluster > W.FirstCluster); + assert(PivotCluster <= W.LastCluster); + + CaseClusterIt FirstLeft = W.FirstCluster; + CaseClusterIt LastRight = W.LastCluster; + + const ConstantInt *Pivot = PivotCluster->Low; + + // New blocks will be inserted immediately after the current one. + MachineFunction::iterator BBI(W.MBB); + ++BBI; + + // We will branch to the LHS if Value < Pivot. If LHS is a single cluster, + // we can branch to its destination directly if it's squeezed exactly in + // between the known lower bound and Pivot - 1. + MachineBasicBlock *LeftMBB; + if (FirstLeft == LastLeft && FirstLeft->Kind == CC_Range && + FirstLeft->Low == W.GE && + (FirstLeft->High->getValue() + 1LL) == Pivot->getValue()) { + LeftMBB = FirstLeft->MBB; + } else { + LeftMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, LeftMBB); + WorkList.push_back( + {LeftMBB, FirstLeft, LastLeft, W.GE, Pivot, W.DefaultProb / 2}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Similarly, we will branch to the RHS if Value >= Pivot. If RHS is a + // single cluster, RHS.Low == Pivot, and we can branch to its destination + // directly if RHS.High equals the current upper bound. + MachineBasicBlock *RightMBB; + if (FirstRight == LastRight && FirstRight->Kind == CC_Range && + W.LT && (FirstRight->High->getValue() + 1ULL) == W.LT->getValue()) { + RightMBB = FirstRight->MBB; + } else { + RightMBB = FuncInfo.MF->CreateMachineBasicBlock(W.MBB->getBasicBlock()); + FuncInfo.MF->insert(BBI, RightMBB); + WorkList.push_back( + {RightMBB, FirstRight, LastRight, Pivot, W.LT, W.DefaultProb / 2}); + // Put Cond in a virtual register to make it available from the new blocks. + ExportFromCurrentBlock(Cond); + } + + // Create the CaseBlock record that will be used to lower the branch. + CaseBlock CB(ISD::SETLT, Cond, Pivot, nullptr, LeftMBB, RightMBB, W.MBB, + LeftProb, RightProb); + + if (W.MBB == SwitchMBB) + visitSwitchCase(CB, SwitchMBB); + else + SwitchCases.push_back(CB); +} + +void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { + // Extract cases from the switch. + BranchProbabilityInfo *BPI = FuncInfo.BPI; + CaseClusterVector Clusters; + Clusters.reserve(SI.getNumCases()); + for (auto I : SI.cases()) { + MachineBasicBlock *Succ = FuncInfo.MBBMap[I.getCaseSuccessor()]; + const ConstantInt *CaseVal = I.getCaseValue(); + BranchProbability Prob = + BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex()) + : BranchProbability(1, SI.getNumCases() + 1); + Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob)); + } + + MachineBasicBlock *DefaultMBB = FuncInfo.MBBMap[SI.getDefaultDest()]; + + // Cluster adjacent cases with the same destination. We do this at all + // optimization levels because it's cheap to do and will make codegen faster + // if there are many clusters. + sortAndRangeify(Clusters); + + if (TM.getOptLevel() != CodeGenOpt::None) { + // Replace an unreachable default with the most popular destination. + // FIXME: Exploit unreachable default more aggressively. + bool UnreachableDefault = + isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()); + if (UnreachableDefault && !Clusters.empty()) { + DenseMap<const BasicBlock *, unsigned> Popularity; + unsigned MaxPop = 0; + const BasicBlock *MaxBB = nullptr; + for (auto I : SI.cases()) { + const BasicBlock *BB = I.getCaseSuccessor(); + if (++Popularity[BB] > MaxPop) { + MaxPop = Popularity[BB]; + MaxBB = BB; + } + } + // Set new default. + assert(MaxPop > 0 && MaxBB); + DefaultMBB = FuncInfo.MBBMap[MaxBB]; + + // Remove cases that were pointing to the destination that is now the + // default. + CaseClusterVector New; + New.reserve(Clusters.size()); + for (CaseCluster &CC : Clusters) { + if (CC.MBB != DefaultMBB) + New.push_back(CC); + } + Clusters = std::move(New); + } + } + + // If there is only the default destination, jump there directly. + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; + if (Clusters.empty()) { + SwitchMBB->addSuccessor(DefaultMBB); + if (DefaultMBB != NextBlock(SwitchMBB)) { + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(DefaultMBB))); + } + return; + } + + findJumpTables(Clusters, &SI, DefaultMBB); + findBitTestClusters(Clusters, &SI); + + DEBUG({ + dbgs() << "Case clusters: "; + for (const CaseCluster &C : Clusters) { + if (C.Kind == CC_JumpTable) dbgs() << "JT:"; + if (C.Kind == CC_BitTests) dbgs() << "BT:"; + + C.Low->getValue().print(dbgs(), true); + if (C.Low != C.High) { + dbgs() << '-'; + C.High->getValue().print(dbgs(), true); + } + dbgs() << ' '; + } + dbgs() << '\n'; + }); + + assert(!Clusters.empty()); + SwitchWorkList WorkList; + CaseClusterIt First = Clusters.begin(); + CaseClusterIt Last = Clusters.end() - 1; + auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB); + WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb}); + + while (!WorkList.empty()) { + SwitchWorkListItem W = WorkList.back(); + WorkList.pop_back(); + unsigned NumClusters = W.LastCluster - W.FirstCluster + 1; + + if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) { + // For optimized builds, lower large range as a balanced binary tree. + splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB); + continue; + } + + lowerWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h new file mode 100644 index 0000000..8fb85ff --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -0,0 +1,965 @@ +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating from LLVM IR into SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_SELECTIONDAGBUILDER_H + +#include "StatepointLowering.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" +#include <vector> + +namespace llvm { + +class AddrSpaceCastInst; +class AllocaInst; +class BasicBlock; +class BitCastInst; +class BranchInst; +class CallInst; +class DbgValueInst; +class ExtractElementInst; +class ExtractValueInst; +class FCmpInst; +class FPExtInst; +class FPToSIInst; +class FPToUIInst; +class FPTruncInst; +class Function; +class FunctionLoweringInfo; +class GetElementPtrInst; +class GCFunctionInfo; +class ICmpInst; +class IntToPtrInst; +class IndirectBrInst; +class InvokeInst; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoadInst; +class MachineBasicBlock; +class MachineInstr; +class MachineRegisterInfo; +class MDNode; +class MVT; +class PHINode; +class PtrToIntInst; +class ReturnInst; +class SDDbgValue; +class SExtInst; +class SelectInst; +class ShuffleVectorInst; +class SIToFPInst; +class StoreInst; +class SwitchInst; +class DataLayout; +class TargetLibraryInfo; +class TargetLowering; +class TruncInst; +class UIToFPInst; +class UnreachableInst; +class VAArgInst; +class ZExtInst; + +//===----------------------------------------------------------------------===// +/// SelectionDAGBuilder - This is the common target-independent lowering +/// implementation that is parameterized by a TargetLowering object. +/// +class SelectionDAGBuilder { + /// CurInst - The current instruction being visited + const Instruction *CurInst; + + DenseMap<const Value*, SDValue> NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap<const Value*, SDValue> UnusedArgNodeMap; + + /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap. + class DanglingDebugInfo { + const DbgValueInst* DI; + DebugLoc dl; + unsigned SDNodeOrder; + public: + DanglingDebugInfo() : DI(nullptr), dl(DebugLoc()), SDNodeOrder(0) { } + DanglingDebugInfo(const DbgValueInst *di, DebugLoc DL, unsigned SDNO) : + DI(di), dl(DL), SDNodeOrder(SDNO) { } + const DbgValueInst* getDI() { return DI; } + DebugLoc getdl() { return dl; } + unsigned getSDNodeOrder() { return SDNodeOrder; } + }; + + /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not + /// yet seen the referent. We defer handling these until we do see it. + DenseMap<const Value*, DanglingDebugInfo> DanglingDebugInfoMap; + +public: + /// PendingLoads - Loads are not emitted to the program immediately. We bunch + /// them up and then emit token factor nodes when possible. This allows us to + /// get simple disambiguation between loads without worrying about alias + /// analysis. + SmallVector<SDValue, 8> PendingLoads; + + /// State used while lowering a statepoint sequence (gc_statepoint, + /// gc_relocate, and gc_result). See StatepointLowering.hpp/cpp for details. + StatepointLoweringState StatepointLowering; +private: + + /// PendingExports - CopyToReg nodes that copy values to virtual registers + /// for export to other blocks need to be emitted before any terminator + /// instruction, but they have no other ordering requirements. We bunch them + /// up and the emit a single tokenfactor for them just before terminator + /// instructions. + SmallVector<SDValue, 8> PendingExports; + + /// SDNodeOrder - A unique monotonically increasing number used to order the + /// SDNodes we create. + unsigned SDNodeOrder; + + enum CaseClusterKind { + /// A cluster of adjacent case labels with the same destination, or just one + /// case. + CC_Range, + /// A cluster of cases suitable for jump table lowering. + CC_JumpTable, + /// A cluster of cases suitable for bit test lowering. + CC_BitTests + }; + + /// A cluster of case labels. + struct CaseCluster { + CaseClusterKind Kind; + const ConstantInt *Low, *High; + union { + MachineBasicBlock *MBB; + unsigned JTCasesIndex; + unsigned BTCasesIndex; + }; + BranchProbability Prob; + + static CaseCluster range(const ConstantInt *Low, const ConstantInt *High, + MachineBasicBlock *MBB, BranchProbability Prob) { + CaseCluster C; + C.Kind = CC_Range; + C.Low = Low; + C.High = High; + C.MBB = MBB; + C.Prob = Prob; + return C; + } + + static CaseCluster jumpTable(const ConstantInt *Low, + const ConstantInt *High, unsigned JTCasesIndex, + BranchProbability Prob) { + CaseCluster C; + C.Kind = CC_JumpTable; + C.Low = Low; + C.High = High; + C.JTCasesIndex = JTCasesIndex; + C.Prob = Prob; + return C; + } + + static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High, + unsigned BTCasesIndex, BranchProbability Prob) { + CaseCluster C; + C.Kind = CC_BitTests; + C.Low = Low; + C.High = High; + C.BTCasesIndex = BTCasesIndex; + C.Prob = Prob; + return C; + } + }; + + typedef std::vector<CaseCluster> CaseClusterVector; + typedef CaseClusterVector::iterator CaseClusterIt; + + struct CaseBits { + uint64_t Mask; + MachineBasicBlock* BB; + unsigned Bits; + BranchProbability ExtraProb; + + CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits, + BranchProbability Prob): + Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) { } + + CaseBits() : Mask(0), BB(nullptr), Bits(0) {} + }; + + typedef std::vector<CaseBits> CaseBitsVector; + + /// Sort Clusters and merge adjacent cases. + void sortAndRangeify(CaseClusterVector &Clusters); + + /// CaseBlock - This structure is used to communicate between + /// SelectionDAGBuilder and SDISel for the code generation of additional basic + /// blocks needed by multi-case switch statements. + struct CaseBlock { + CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs, + const Value *cmpmiddle, MachineBasicBlock *truebb, + MachineBasicBlock *falsebb, MachineBasicBlock *me, + BranchProbability trueprob = BranchProbability::getUnknown(), + BranchProbability falseprob = BranchProbability::getUnknown()) + : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs), + TrueBB(truebb), FalseBB(falsebb), ThisBB(me), TrueProb(trueprob), + FalseProb(falseprob) {} + + // CC - the condition code to use for the case block's setcc node + ISD::CondCode CC; + + // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit. + // Emit by default LHS op RHS. MHS is used for range comparisons: + // If MHS is not null: (LHS <= MHS) and (MHS <= RHS). + const Value *CmpLHS, *CmpMHS, *CmpRHS; + + // TrueBB/FalseBB - the block to branch to if the setcc is true/false. + MachineBasicBlock *TrueBB, *FalseBB; + + // ThisBB - the block into which to emit the code for the setcc and branches + MachineBasicBlock *ThisBB; + + // TrueProb/FalseProb - branch weights. + BranchProbability TrueProb, FalseProb; + }; + + struct JumpTable { + JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, + MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} + + /// Reg - the virtual register containing the index of the jump table entry + //. to jump to. + unsigned Reg; + /// JTI - the JumpTableIndex for this jump table in the function. + unsigned JTI; + /// MBB - the MBB into which to emit the code for the indirect jump. + MachineBasicBlock *MBB; + /// Default - the MBB of the default bb, which is a successor of the range + /// check MBB. This is when updating PHI nodes in successors. + MachineBasicBlock *Default; + }; + struct JumpTableHeader { + JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H, + bool E = false): + First(F), Last(L), SValue(SV), HeaderBB(H), Emitted(E) {} + APInt First; + APInt Last; + const Value *SValue; + MachineBasicBlock *HeaderBB; + bool Emitted; + }; + typedef std::pair<JumpTableHeader, JumpTable> JumpTableBlock; + + struct BitTestCase { + BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr, + BranchProbability Prob): + Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) { } + uint64_t Mask; + MachineBasicBlock *ThisBB; + MachineBasicBlock *TargetBB; + BranchProbability ExtraProb; + }; + + typedef SmallVector<BitTestCase, 3> BitTestInfo; + + struct BitTestBlock { + BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT, + bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D, + BitTestInfo C, BranchProbability Pr) + : First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), + ContiguousRange(CR), Parent(P), Default(D), Cases(std::move(C)), + Prob(Pr) {} + APInt First; + APInt Range; + const Value *SValue; + unsigned Reg; + MVT RegVT; + bool Emitted; + bool ContiguousRange; + MachineBasicBlock *Parent; + MachineBasicBlock *Default; + BitTestInfo Cases; + BranchProbability Prob; + BranchProbability DefaultProb; + }; + + /// Minimum jump table density, in percent. + enum { MinJumpTableDensity = 40 }; + + /// Check whether a range of clusters is dense enough for a jump table. + bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases, + unsigned First, unsigned Last); + + /// Build a jump table cluster from Clusters[First..Last]. Returns false if it + /// decides it's not a good idea. + bool buildJumpTable(CaseClusterVector &Clusters, unsigned First, + unsigned Last, const SwitchInst *SI, + MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster); + + /// Find clusters of cases suitable for jump table lowering. + void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI, + MachineBasicBlock *DefaultMBB); + + /// Check whether the range [Low,High] fits in a machine word. + bool rangeFitsInWord(const APInt &Low, const APInt &High); + + /// Check whether these clusters are suitable for lowering with bit tests based + /// on the number of destinations, comparison metric, and range. + bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, + const APInt &Low, const APInt &High); + + /// Build a bit test cluster from Clusters[First..Last]. Returns false if it + /// decides it's not a good idea. + bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last, + const SwitchInst *SI, CaseCluster &BTCluster); + + /// Find clusters of cases suitable for bit test lowering. + void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI); + + struct SwitchWorkListItem { + MachineBasicBlock *MBB; + CaseClusterIt FirstCluster; + CaseClusterIt LastCluster; + const ConstantInt *GE; + const ConstantInt *LT; + BranchProbability DefaultProb; + }; + typedef SmallVector<SwitchWorkListItem, 4> SwitchWorkList; + + /// Determine the rank by weight of CC in [First,Last]. If CC has more weight + /// than each cluster in the range, its rank is 0. + static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First, + CaseClusterIt Last); + + /// Emit comparison and split W into two subtrees. + void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W, + Value *Cond, MachineBasicBlock *SwitchMBB); + + /// Lower W. + void lowerWorkItem(SwitchWorkListItem W, Value *Cond, + MachineBasicBlock *SwitchMBB, + MachineBasicBlock *DefaultMBB); + + + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform-specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform-specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() : ParentMBB(nullptr), SuccessMBB(nullptr), + FailureMBB(nullptr), Guard(nullptr), + GuardReg(0) { } + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB && Guard; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, + MachineBasicBlock *MBB, + const CallInst &StackProtCheckCall) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true); + FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB); + if (!Guard) + Guard = StackProtCheckCall.getArgOperand(0); + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = nullptr; + SuccessMBB = nullptr; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = nullptr; + Guard = nullptr; + GuardReg = 0; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + const Value *getGuard() { return Guard; } + + unsigned getGuardReg() const { return GuardReg; } + void setGuardReg(unsigned R) { GuardReg = R; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB; + + /// The guard variable which we will compare against the stored value in the + /// stack protector stack slot. + const Value *Guard; + + /// The virtual register holding the stack guard value. + unsigned GuardReg; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. Assign a large weight if IsLikely is true. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + bool IsLikely, + MachineBasicBlock *SuccMBB = nullptr); + }; + +private: + const TargetMachine &TM; +public: + /// Lowest valid SDNodeOrder. The special case 0 is reserved for scheduling + /// nodes without a corresponding SDNode. + static const unsigned LowestSDNodeOrder = 1; + + SelectionDAG &DAG; + const DataLayout *DL; + AliasAnalysis *AA; + const TargetLibraryInfo *LibInfo; + + /// SwitchCases - Vector of CaseBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<CaseBlock> SwitchCases; + /// JTCases - Vector of JumpTable structures used to communicate + /// SwitchInst code generation information. + std::vector<JumpTableBlock> JTCases; + /// BitTestCases - Vector of BitTestBlock structures used to communicate + /// SwitchInst code generation information. + std::vector<BitTestBlock> BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; + + // Emit PHI-node-operand constants only once even if used by multiple + // PHI nodes. + DenseMap<const Constant *, unsigned> ConstantsOut; + + /// FuncInfo - Information about the function as a whole. + /// + FunctionLoweringInfo &FuncInfo; + + /// GFI - Garbage collection metadata for the function. + GCFunctionInfo *GFI; + + /// LPadToCallSiteMap - Map a landing pad to the call site indexes. + DenseMap<MachineBasicBlock*, SmallVector<unsigned, 4> > LPadToCallSiteMap; + + /// HasTailCall - This is set to true if a call in the current + /// block has been translated as a tail call. In this case, + /// no subsequent DAG nodes should be created. + /// + bool HasTailCall; + + LLVMContext *Context; + + SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) + : CurInst(nullptr), SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), + DAG(dag), FuncInfo(funcinfo), + HasTailCall(false) { + } + + void init(GCFunctionInfo *gfi, AliasAnalysis &aa, + const TargetLibraryInfo *li); + + /// clear - Clear out the current SelectionDAG and the associated + /// state and prepare this SelectionDAGBuilder object to be used + /// for a new block. This doesn't clear out information about + /// additional blocks that are needed to complete switch lowering + /// or PHI node updating; that information is cleared out as it is + /// consumed. + void clear(); + + /// clearDanglingDebugInfo - Clear the dangling debug information + /// map. This function is separated from the clear so that debug + /// information that is dangling in a basic block can be properly + /// resolved in a different basic block. This allows the + /// SelectionDAG to resolve dangling debug information attached + /// to PHI nodes. + void clearDanglingDebugInfo(); + + /// getRoot - Return the current virtual root of the Selection DAG, + /// flushing any PendingLoad items. This must be done before emitting + /// a store or any other node that may need to be ordered after any + /// prior load instructions. + /// + SDValue getRoot(); + + /// getControlRoot - Similar to getRoot, but instead of flushing all the + /// PendingLoad items, flush all the PendingExports items. It is necessary + /// to do this before emitting a terminator instruction. + /// + SDValue getControlRoot(); + + SDLoc getCurSDLoc() const { + return SDLoc(CurInst, SDNodeOrder); + } + + DebugLoc getCurDebugLoc() const { + return CurInst ? CurInst->getDebugLoc() : DebugLoc(); + } + + unsigned getSDNodeOrder() const { return SDNodeOrder; } + + void CopyValueToVirtualRegister(const Value *V, unsigned Reg); + + void visit(const Instruction &I); + + void visit(unsigned Opcode, const User &I); + + /// getCopyFromRegs - If there was virtual register allocated for the value V + /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. + SDValue getCopyFromRegs(const Value *V, Type *Ty); + + // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, + // generate the debug data structures now that we've seen its definition. + void resolveDanglingDebugInfo(const Value *V, SDValue Val); + SDValue getValue(const Value *V); + bool findValue(const Value *V) const; + + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); + + void setValue(const Value *V, SDValue NewN) { + SDValue &N = NodeMap[V]; + assert(!N.getNode() && "Already set a value for this node!"); + N = NewN; + } + + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(!N.getNode() && "Already set a value for this node!"); + N = NewN; + } + + void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TW, + BranchProbability FW); + void EmitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TW, BranchProbability FW); + bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases); + bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB); + void CopyToExportRegsIfNeeded(const Value *V); + void ExportFromCurrentBlock(const Value *V); + void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, + const BasicBlock *EHPadBB = nullptr); + + std::pair<SDValue, SDValue> lowerCallOperands( + ImmutableCallSite CS, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + Type *ReturnTy, + const BasicBlock *EHPadBB = nullptr, + bool IsPatchPoint = false); + + /// UpdateSplitBlock - When an MBB was split during scheduling, update the + /// references that need to refer to the last resulting block. + void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); + + // This function is responsible for the whole statepoint lowering process. + // It uniformly handles invoke and call statepoints. + void LowerStatepoint(ImmutableStatepoint Statepoint, + const BasicBlock *EHPadBB = nullptr); +private: + std::pair<SDValue, SDValue> + lowerInvokable(TargetLowering::CallLoweringInfo &CLI, + const BasicBlock *EHPadBB = nullptr); + + // Terminator instructions. + void visitRet(const ReturnInst &I); + void visitBr(const BranchInst &I); + void visitSwitch(const SwitchInst &I); + void visitIndirectBr(const IndirectBrInst &I); + void visitUnreachable(const UnreachableInst &I); + void visitCleanupRet(const CleanupReturnInst &I); + void visitCatchSwitch(const CatchSwitchInst &I); + void visitCatchRet(const CatchReturnInst &I); + void visitCatchPad(const CatchPadInst &I); + void visitCleanupPad(const CleanupPadInst &CPI); + + BranchProbability getEdgeProbability(const MachineBasicBlock *Src, + const MachineBasicBlock *Dst) const; + void addSuccessorWithProb( + MachineBasicBlock *Src, MachineBasicBlock *Dst, + BranchProbability Prob = BranchProbability::getUnknown()); + +public: + void visitSwitchCase(CaseBlock &CB, + MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); + void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); + void visitBitTestCase(BitTestBlock &BB, + MachineBasicBlock* NextMBB, + BranchProbability BranchProbToNext, + unsigned Reg, + BitTestCase &B, + MachineBasicBlock *SwitchBB); + void visitJumpTable(JumpTable &JT); + void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, + MachineBasicBlock *SwitchBB); + +private: + // These all get lowered before this pass. + void visitInvoke(const InvokeInst &I); + void visitResume(const ResumeInst &I); + + void visitBinary(const User &I, unsigned OpCode); + void visitShift(const User &I, unsigned Opcode); + void visitAdd(const User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); } + void visitSub(const User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(const User &I); + void visitMul(const User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); } + void visitURem(const User &I) { visitBinary(I, ISD::UREM); } + void visitSRem(const User &I) { visitBinary(I, ISD::SREM); } + void visitFRem(const User &I) { visitBinary(I, ISD::FREM); } + void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); } + void visitSDiv(const User &I); + void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); } + void visitAnd (const User &I) { visitBinary(I, ISD::AND); } + void visitOr (const User &I) { visitBinary(I, ISD::OR); } + void visitXor (const User &I) { visitBinary(I, ISD::XOR); } + void visitShl (const User &I) { visitShift(I, ISD::SHL); } + void visitLShr(const User &I) { visitShift(I, ISD::SRL); } + void visitAShr(const User &I) { visitShift(I, ISD::SRA); } + void visitICmp(const User &I); + void visitFCmp(const User &I); + // Visit the conversion instructions + void visitTrunc(const User &I); + void visitZExt(const User &I); + void visitSExt(const User &I); + void visitFPTrunc(const User &I); + void visitFPExt(const User &I); + void visitFPToUI(const User &I); + void visitFPToSI(const User &I); + void visitUIToFP(const User &I); + void visitSIToFP(const User &I); + void visitPtrToInt(const User &I); + void visitIntToPtr(const User &I); + void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); + + void visitExtractElement(const User &I); + void visitInsertElement(const User &I); + void visitShuffleVector(const User &I); + + void visitExtractValue(const ExtractValueInst &I); + void visitInsertValue(const InsertValueInst &I); + void visitLandingPad(const LandingPadInst &I); + + void visitGetElementPtr(const User &I); + void visitSelect(const User &I); + + void visitAlloca(const AllocaInst &I); + void visitLoad(const LoadInst &I); + void visitStore(const StoreInst &I); + void visitMaskedLoad(const CallInst &I); + void visitMaskedStore(const CallInst &I); + void visitMaskedGather(const CallInst &I); + void visitMaskedScatter(const CallInst &I); + void visitAtomicCmpXchg(const AtomicCmpXchgInst &I); + void visitAtomicRMW(const AtomicRMWInst &I); + void visitFence(const FenceInst &I); + void visitPHI(const PHINode &I); + void visitCall(const CallInst &I); + bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); + bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); + bool visitBinaryFloatCall(const CallInst &I, unsigned Opcode); + void visitAtomicLoad(const LoadInst &I); + void visitAtomicStore(const StoreInst &I); + + void visitInlineAsm(ImmutableCallSite CS); + const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); + void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); + + void visitVAStart(const CallInst &I); + void visitVAArg(const VAArgInst &I); + void visitVAEnd(const CallInst &I); + void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(ImmutableCallSite CS, + const BasicBlock *EHPadBB = nullptr); + + // These three are implemented in StatepointLowering.cpp + void visitStatepoint(const CallInst &I); + void visitGCRelocate(const GCRelocateInst &I); + void visitGCResult(const CallInst &I); + + void visitUserOp1(const Instruction &I) { + llvm_unreachable("UserOp1 should not exist at instruction selection time!"); + } + void visitUserOp2(const Instruction &I) { + llvm_unreachable("UserOp2 should not exist at instruction selection time!"); + } + + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); + + /// EmitFuncArgumentDbgValue - If V is an function argument then create + /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// instruction selection, they will be inserted to the entry BB. + bool EmitFuncArgumentDbgValue(const Value *V, DILocalVariable *Variable, + DIExpression *Expr, DILocation *DL, + int64_t Offset, bool IsIndirect, + const SDValue &N); + + /// Return the next block after MBB, or nullptr if there is none. + MachineBasicBlock *NextBlock(MachineBasicBlock *MBB); + + /// Update the DAG and DAG builder with the relevant information after + /// a new root node has been created which could be a tail call. + void updateDAGForMaybeTailCall(SDValue MaybeTC); +}; + +/// RegsForValue - This struct represents the registers (physical or virtual) +/// that a particular set of values is assigned, and the type information about +/// the value. The most common situation is to represent one value at a time, +/// but struct or array values are handled element-wise as multiple values. The +/// splitting of aggregates is performed recursively, so that we never have +/// aggregate-typed registers. The values at this point do not necessarily have +/// legal types, so each value may require one or more registers of some legal +/// type. +/// +struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector<EVT, 4> ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector<MVT, 4> RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector<unsigned, 4> Regs; + + RegsForValue(); + + RegsForValue(const SmallVector<unsigned, 4> ®s, MVT regvt, EVT valuevt); + + RegsForValue(LLVMContext &Context, const TargetLowering &TLI, + const DataLayout &DL, unsigned Reg, Type *Ty); + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + SDLoc dl, + SDValue &Chain, SDValue *Flag, + const Value *V = nullptr) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the specified + /// value into the registers specified by this object. This uses Chain/Flag + /// as the input and updates them for the output Chain/Flag. If the Flag + /// pointer is nullptr, no flag is used. If V is not nullptr, then it is used + /// in printing better diagnostic messages on error. + void + getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, + SDValue *Flag, const Value *V = nullptr, + ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, SDLoc dl, + SelectionDAG &DAG, + std::vector<SDValue> &Ops) const; +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp new file mode 100644 index 0000000..a1c6c4c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -0,0 +1,718 @@ +//===-- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::dump method and friends. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Printable.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +static cl::opt<bool> +VerboseDAGDumping("dag-dump-verbose", cl::Hidden, + cl::desc("Display more information when dumping selection " + "DAG nodes.")); + +std::string SDNode::getOperationName(const SelectionDAG *G) const { + switch (getOpcode()) { + default: + if (getOpcode() < ISD::BUILTIN_OP_END) + return "<<Unknown DAG Node>>"; + if (isMachineOpcode()) { + if (G) + if (const TargetInstrInfo *TII = G->getSubtarget().getInstrInfo()) + if (getMachineOpcode() < TII->getNumOpcodes()) + return TII->getName(getMachineOpcode()); + return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>"; + } + if (G) { + const TargetLowering &TLI = G->getTargetLoweringInfo(); + const char *Name = TLI.getTargetNodeName(getOpcode()); + if (Name) return Name; + return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>"; + } + return "<<Unknown Node #" + utostr(getOpcode()) + ">>"; + +#ifndef NDEBUG + case ISD::DELETED_NODE: return "<<Deleted Node!>>"; +#endif + case ISD::PREFETCH: return "Prefetch"; + case ISD::ATOMIC_FENCE: return "AtomicFence"; + case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; + case ISD::ATOMIC_SWAP: return "AtomicSwap"; + case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; + case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; + case ISD::ATOMIC_LOAD_AND: return "AtomicLoadAnd"; + case ISD::ATOMIC_LOAD_OR: return "AtomicLoadOr"; + case ISD::ATOMIC_LOAD_XOR: return "AtomicLoadXor"; + case ISD::ATOMIC_LOAD_NAND: return "AtomicLoadNand"; + case ISD::ATOMIC_LOAD_MIN: return "AtomicLoadMin"; + case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax"; + case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin"; + case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax"; + case ISD::ATOMIC_LOAD: return "AtomicLoad"; + case ISD::ATOMIC_STORE: return "AtomicStore"; + case ISD::PCMARKER: return "PCMarker"; + case ISD::READCYCLECOUNTER: return "ReadCycleCounter"; + case ISD::SRCVALUE: return "SrcValue"; + case ISD::MDNODE_SDNODE: return "MDNode"; + case ISD::EntryToken: return "EntryToken"; + case ISD::TokenFactor: return "TokenFactor"; + case ISD::AssertSext: return "AssertSext"; + case ISD::AssertZext: return "AssertZext"; + + case ISD::BasicBlock: return "BasicBlock"; + case ISD::VALUETYPE: return "ValueType"; + case ISD::Register: return "Register"; + case ISD::RegisterMask: return "RegisterMask"; + case ISD::Constant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueConstant"; + return "Constant"; + case ISD::ConstantFP: return "ConstantFP"; + case ISD::GlobalAddress: return "GlobalAddress"; + case ISD::GlobalTLSAddress: return "GlobalTLSAddress"; + case ISD::FrameIndex: return "FrameIndex"; + case ISD::JumpTable: return "JumpTable"; + case ISD::GLOBAL_OFFSET_TABLE: return "GLOBAL_OFFSET_TABLE"; + case ISD::RETURNADDR: return "RETURNADDR"; + case ISD::FRAMEADDR: return "FRAMEADDR"; + case ISD::LOCAL_RECOVER: return "LOCAL_RECOVER"; + case ISD::READ_REGISTER: return "READ_REGISTER"; + case ISD::WRITE_REGISTER: return "WRITE_REGISTER"; + case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; + case ISD::EH_RETURN: return "EH_RETURN"; + case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; + case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; + case ISD::EH_SJLJ_SETUP_DISPATCH: return "EH_SJLJ_SETUP_DISPATCH"; + case ISD::ConstantPool: return "ConstantPool"; + case ISD::TargetIndex: return "TargetIndex"; + case ISD::ExternalSymbol: return "ExternalSymbol"; + case ISD::BlockAddress: return "BlockAddress"; + case ISD::INTRINSIC_WO_CHAIN: + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1; + unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue(); + if (IID < Intrinsic::num_intrinsics) + return Intrinsic::getName((Intrinsic::ID)IID); + else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo()) + return TII->getName(IID); + llvm_unreachable("Invalid intrinsic ID"); + } + + case ISD::BUILD_VECTOR: return "BUILD_VECTOR"; + case ISD::TargetConstant: + if (cast<ConstantSDNode>(this)->isOpaque()) + return "OpaqueTargetConstant"; + return "TargetConstant"; + case ISD::TargetConstantFP: return "TargetConstantFP"; + case ISD::TargetGlobalAddress: return "TargetGlobalAddress"; + case ISD::TargetGlobalTLSAddress: return "TargetGlobalTLSAddress"; + case ISD::TargetFrameIndex: return "TargetFrameIndex"; + case ISD::TargetJumpTable: return "TargetJumpTable"; + case ISD::TargetConstantPool: return "TargetConstantPool"; + case ISD::TargetExternalSymbol: return "TargetExternalSymbol"; + case ISD::MCSymbol: return "MCSymbol"; + case ISD::TargetBlockAddress: return "TargetBlockAddress"; + + case ISD::CopyToReg: return "CopyToReg"; + case ISD::CopyFromReg: return "CopyFromReg"; + case ISD::UNDEF: return "undef"; + case ISD::MERGE_VALUES: return "merge_values"; + case ISD::INLINEASM: return "inlineasm"; + case ISD::EH_LABEL: return "eh_label"; + case ISD::HANDLENODE: return "handlenode"; + + // Unary operators + case ISD::FABS: return "fabs"; + case ISD::FMINNUM: return "fminnum"; + case ISD::FMAXNUM: return "fmaxnum"; + case ISD::FMINNAN: return "fminnan"; + case ISD::FMAXNAN: return "fmaxnan"; + case ISD::FNEG: return "fneg"; + case ISD::FSQRT: return "fsqrt"; + case ISD::FSIN: return "fsin"; + case ISD::FCOS: return "fcos"; + case ISD::FSINCOS: return "fsincos"; + case ISD::FTRUNC: return "ftrunc"; + case ISD::FFLOOR: return "ffloor"; + case ISD::FCEIL: return "fceil"; + case ISD::FRINT: return "frint"; + case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; + + // Binary operators + case ISD::ADD: return "add"; + case ISD::SUB: return "sub"; + case ISD::MUL: return "mul"; + case ISD::MULHU: return "mulhu"; + case ISD::MULHS: return "mulhs"; + case ISD::SDIV: return "sdiv"; + case ISD::UDIV: return "udiv"; + case ISD::SREM: return "srem"; + case ISD::UREM: return "urem"; + case ISD::SMUL_LOHI: return "smul_lohi"; + case ISD::UMUL_LOHI: return "umul_lohi"; + case ISD::SDIVREM: return "sdivrem"; + case ISD::UDIVREM: return "udivrem"; + case ISD::AND: return "and"; + case ISD::OR: return "or"; + case ISD::XOR: return "xor"; + case ISD::SHL: return "shl"; + case ISD::SRA: return "sra"; + case ISD::SRL: return "srl"; + case ISD::ROTL: return "rotl"; + case ISD::ROTR: return "rotr"; + case ISD::FADD: return "fadd"; + case ISD::FSUB: return "fsub"; + case ISD::FMUL: return "fmul"; + case ISD::FDIV: return "fdiv"; + case ISD::FMA: return "fma"; + case ISD::FMAD: return "fmad"; + case ISD::FREM: return "frem"; + case ISD::FCOPYSIGN: return "fcopysign"; + case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + case ISD::SMIN: return "smin"; + case ISD::SMAX: return "smax"; + case ISD::UMIN: return "umin"; + case ISD::UMAX: return "umax"; + + case ISD::FPOWI: return "fpowi"; + case ISD::SETCC: return "setcc"; + case ISD::SETCCE: return "setcce"; + case ISD::SELECT: return "select"; + case ISD::VSELECT: return "vselect"; + case ISD::SELECT_CC: return "select_cc"; + case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt"; + case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt"; + case ISD::CONCAT_VECTORS: return "concat_vectors"; + case ISD::INSERT_SUBVECTOR: return "insert_subvector"; + case ISD::EXTRACT_SUBVECTOR: return "extract_subvector"; + case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector"; + case ISD::VECTOR_SHUFFLE: return "vector_shuffle"; + case ISD::CARRY_FALSE: return "carry_false"; + case ISD::ADDC: return "addc"; + case ISD::ADDE: return "adde"; + case ISD::SADDO: return "saddo"; + case ISD::UADDO: return "uaddo"; + case ISD::SSUBO: return "ssubo"; + case ISD::USUBO: return "usubo"; + case ISD::SMULO: return "smulo"; + case ISD::UMULO: return "umulo"; + case ISD::SUBC: return "subc"; + case ISD::SUBE: return "sube"; + case ISD::SHL_PARTS: return "shl_parts"; + case ISD::SRA_PARTS: return "sra_parts"; + case ISD::SRL_PARTS: return "srl_parts"; + + // Conversion operators. + case ISD::SIGN_EXTEND: return "sign_extend"; + case ISD::ZERO_EXTEND: return "zero_extend"; + case ISD::ANY_EXTEND: return "any_extend"; + case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; + case ISD::TRUNCATE: return "truncate"; + case ISD::FP_ROUND: return "fp_round"; + case ISD::FLT_ROUNDS_: return "flt_rounds"; + case ISD::FP_ROUND_INREG: return "fp_round_inreg"; + case ISD::FP_EXTEND: return "fp_extend"; + + case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::FP_TO_SINT: return "fp_to_sint"; + case ISD::FP_TO_UINT: return "fp_to_uint"; + case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; + case ISD::FP16_TO_FP: return "fp16_to_fp"; + case ISD::FP_TO_FP16: return "fp_to_fp16"; + + case ISD::CONVERT_RNDSAT: { + switch (cast<CvtRndSatSDNode>(this)->getCvtCode()) { + default: llvm_unreachable("Unknown cvt code!"); + case ISD::CVT_FF: return "cvt_ff"; + case ISD::CVT_FS: return "cvt_fs"; + case ISD::CVT_FU: return "cvt_fu"; + case ISD::CVT_SF: return "cvt_sf"; + case ISD::CVT_UF: return "cvt_uf"; + case ISD::CVT_SS: return "cvt_ss"; + case ISD::CVT_SU: return "cvt_su"; + case ISD::CVT_US: return "cvt_us"; + case ISD::CVT_UU: return "cvt_uu"; + } + } + + // Control flow instructions + case ISD::BR: return "br"; + case ISD::BRIND: return "brind"; + case ISD::BR_JT: return "br_jt"; + case ISD::BRCOND: return "brcond"; + case ISD::BR_CC: return "br_cc"; + case ISD::CALLSEQ_START: return "callseq_start"; + case ISD::CALLSEQ_END: return "callseq_end"; + + // EH instructions + case ISD::CATCHRET: return "catchret"; + case ISD::CLEANUPRET: return "cleanupret"; + + // Other operators + case ISD::LOAD: return "load"; + case ISD::STORE: return "store"; + case ISD::MLOAD: return "masked_load"; + case ISD::MSTORE: return "masked_store"; + case ISD::MGATHER: return "masked_gather"; + case ISD::MSCATTER: return "masked_scatter"; + case ISD::VAARG: return "vaarg"; + case ISD::VACOPY: return "vacopy"; + case ISD::VAEND: return "vaend"; + case ISD::VASTART: return "vastart"; + case ISD::DYNAMIC_STACKALLOC: return "dynamic_stackalloc"; + case ISD::EXTRACT_ELEMENT: return "extract_element"; + case ISD::BUILD_PAIR: return "build_pair"; + case ISD::STACKSAVE: return "stacksave"; + case ISD::STACKRESTORE: return "stackrestore"; + case ISD::TRAP: return "trap"; + case ISD::DEBUGTRAP: return "debugtrap"; + case ISD::LIFETIME_START: return "lifetime.start"; + case ISD::LIFETIME_END: return "lifetime.end"; + case ISD::GC_TRANSITION_START: return "gc_transition.start"; + case ISD::GC_TRANSITION_END: return "gc_transition.end"; + case ISD::GET_DYNAMIC_AREA_OFFSET: return "get.dynamic.area.offset"; + + // Bit manipulation + case ISD::BITREVERSE: return "bitreverse"; + case ISD::BSWAP: return "bswap"; + case ISD::CTPOP: return "ctpop"; + case ISD::CTTZ: return "cttz"; + case ISD::CTTZ_ZERO_UNDEF: return "cttz_zero_undef"; + case ISD::CTLZ: return "ctlz"; + case ISD::CTLZ_ZERO_UNDEF: return "ctlz_zero_undef"; + + // Trampolines + case ISD::INIT_TRAMPOLINE: return "init_trampoline"; + case ISD::ADJUST_TRAMPOLINE: return "adjust_trampoline"; + + case ISD::CONDCODE: + switch (cast<CondCodeSDNode>(this)->get()) { + default: llvm_unreachable("Unknown setcc condition!"); + case ISD::SETOEQ: return "setoeq"; + case ISD::SETOGT: return "setogt"; + case ISD::SETOGE: return "setoge"; + case ISD::SETOLT: return "setolt"; + case ISD::SETOLE: return "setole"; + case ISD::SETONE: return "setone"; + + case ISD::SETO: return "seto"; + case ISD::SETUO: return "setuo"; + case ISD::SETUEQ: return "setueq"; + case ISD::SETUGT: return "setugt"; + case ISD::SETUGE: return "setuge"; + case ISD::SETULT: return "setult"; + case ISD::SETULE: return "setule"; + case ISD::SETUNE: return "setune"; + + case ISD::SETEQ: return "seteq"; + case ISD::SETGT: return "setgt"; + case ISD::SETGE: return "setge"; + case ISD::SETLT: return "setlt"; + case ISD::SETLE: return "setle"; + case ISD::SETNE: return "setne"; + + case ISD::SETTRUE: return "settrue"; + case ISD::SETTRUE2: return "settrue2"; + case ISD::SETFALSE: return "setfalse"; + case ISD::SETFALSE2: return "setfalse2"; + } + } +} + +const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { + switch (AM) { + default: return ""; + case ISD::PRE_INC: return "<pre-inc>"; + case ISD::PRE_DEC: return "<pre-dec>"; + case ISD::POST_INC: return "<post-inc>"; + case ISD::POST_DEC: return "<post-dec>"; + } +} + +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { +#ifndef NDEBUG + OS << 't' << Node.PersistentId; +#else + OS << (const void*)&Node; +#endif + }); +} + +void SDNode::dump() const { dump(nullptr); } +void SDNode::dump(const SelectionDAG *G) const { + print(dbgs(), G); + dbgs() << '\n'; +} + +void SDNode::print_types(raw_ostream &OS, const SelectionDAG *G) const { + for (unsigned i = 0, e = getNumValues(); i != e; ++i) { + if (i) OS << ","; + if (getValueType(i) == MVT::Other) + OS << "ch"; + else + OS << getValueType(i).getEVTString(); + } +} + +void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { + if (const MachineSDNode *MN = dyn_cast<MachineSDNode>(this)) { + if (!MN->memoperands_empty()) { + OS << "<"; + OS << "Mem:"; + for (MachineSDNode::mmo_iterator i = MN->memoperands_begin(), + e = MN->memoperands_end(); i != e; ++i) { + OS << **i; + if (std::next(i) != e) + OS << " "; + } + OS << ">"; + } + } else if (const ShuffleVectorSDNode *SVN = + dyn_cast<ShuffleVectorSDNode>(this)) { + OS << "<"; + for (unsigned i = 0, e = ValueList[0].getVectorNumElements(); i != e; ++i) { + int Idx = SVN->getMaskElt(i); + if (i) OS << ","; + if (Idx < 0) + OS << "u"; + else + OS << Idx; + } + OS << ">"; + } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) { + OS << '<' << CSDN->getAPIntValue() << '>'; + } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) { + if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle) + OS << '<' << CSDN->getValueAPF().convertToFloat() << '>'; + else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble) + OS << '<' << CSDN->getValueAPF().convertToDouble() << '>'; + else { + OS << "<APFloat("; + CSDN->getValueAPF().bitcastToAPInt().dump(); + OS << ")>"; + } + } else if (const GlobalAddressSDNode *GADN = + dyn_cast<GlobalAddressSDNode>(this)) { + int64_t offset = GADN->getOffset(); + OS << '<'; + GADN->getGlobal()->printAsOperand(OS); + OS << '>'; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = GADN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) { + OS << "<" << FIDN->getIndex() << ">"; + } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) { + OS << "<" << JTDN->getIndex() << ">"; + if (unsigned int TF = JTDN->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){ + int offset = CP->getOffset(); + if (CP->isMachineConstantPoolEntry()) + OS << "<" << *CP->getMachineCPVal() << ">"; + else + OS << "<" << *CP->getConstVal() << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = CP->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const TargetIndexSDNode *TI = dyn_cast<TargetIndexSDNode>(this)) { + OS << "<" << TI->getIndex() << '+' << TI->getOffset() << ">"; + if (unsigned TF = TI->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) { + OS << "<"; + const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock(); + if (LBB) + OS << LBB->getName() << " "; + OS << (const void*)BBDN->getBasicBlock() << ">"; + } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) { + OS << ' ' << PrintReg(R->getReg(), + G ? G->getSubtarget().getRegisterInfo() : nullptr); + } else if (const ExternalSymbolSDNode *ES = + dyn_cast<ExternalSymbolSDNode>(this)) { + OS << "'" << ES->getSymbol() << "'"; + if (unsigned int TF = ES->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) { + if (M->getValue()) + OS << "<" << M->getValue() << ">"; + else + OS << "<null>"; + } else if (const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(this)) { + if (MD->getMD()) + OS << "<" << MD->getMD() << ">"; + else + OS << "<null>"; + } else if (const VTSDNode *N = dyn_cast<VTSDNode>(this)) { + OS << ":" << N->getVT().getEVTString(); + } + else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(this)) { + OS << "<" << *LD->getMemOperand(); + + bool doExt = true; + switch (LD->getExtensionType()) { + default: doExt = false; break; + case ISD::EXTLOAD: OS << ", anyext"; break; + case ISD::SEXTLOAD: OS << ", sext"; break; + case ISD::ZEXTLOAD: OS << ", zext"; break; + } + if (doExt) + OS << " from " << LD->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(LD->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(this)) { + OS << "<" << *ST->getMemOperand(); + + if (ST->isTruncatingStore()) + OS << ", trunc to " << ST->getMemoryVT().getEVTString(); + + const char *AM = getIndexedModeName(ST->getAddressingMode()); + if (*AM) + OS << ", " << AM; + + OS << ">"; + } else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) { + OS << "<" << *M->getMemOperand() << ">"; + } else if (const BlockAddressSDNode *BA = + dyn_cast<BlockAddressSDNode>(this)) { + int64_t offset = BA->getOffset(); + OS << "<"; + BA->getBlockAddress()->getFunction()->printAsOperand(OS, false); + OS << ", "; + BA->getBlockAddress()->getBasicBlock()->printAsOperand(OS, false); + OS << ">"; + if (offset > 0) + OS << " + " << offset; + else + OS << " " << offset; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast<AddrSpaceCastSDNode>(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; + } + + if (VerboseDAGDumping) { + if (unsigned Order = getIROrder()) + OS << " [ORD=" << Order << ']'; + + if (getNodeId() != -1) + OS << " [ID=" << getNodeId() << ']'; + + if (!G) + return; + + DILocation *L = getDebugLoc(); + if (!L) + return; + + if (auto *Scope = L->getScope()) + OS << Scope->getFilename(); + else + OS << "<unknown>"; + OS << ':' << L->getLine(); + if (unsigned C = L->getColumn()) + OS << ':' << C; + } +} + +/// Return true if this node is so simple that we should just print it inline +/// if it appears as an operand. +static bool shouldPrintInline(const SDNode &Node) { + if (Node.getOpcode() == ISD::EntryToken) + return false; + return Node.getNumOperands() == 0; +} + +static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { + for (const SDValue &Op : N->op_values()) { + if (shouldPrintInline(*Op.getNode())) + continue; + if (Op.getNode()->hasOneUse()) + DumpNodes(Op.getNode(), indent+2, G); + } + + dbgs().indent(indent); + N->dump(G); +} + +void SelectionDAG::dump() const { + dbgs() << "SelectionDAG has " << AllNodes.size() << " nodes:\n"; + + for (allnodes_const_iterator I = allnodes_begin(), E = allnodes_end(); + I != E; ++I) { + const SDNode *N = &*I; + if (!N->hasOneUse() && N != getRoot().getNode() && + (!shouldPrintInline(*N) || N->use_empty())) + DumpNodes(N, 2, this); + } + + if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this); + dbgs() << "\n\n"; +} + +void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { + OS << PrintNodeId(*this) << ": "; + print_types(OS, G); + OS << " = " << getOperationName(G); + print_details(OS, G); +} + +static bool printOperand(raw_ostream &OS, const SelectionDAG *G, + const SDValue Value) { + if (!Value.getNode()) { + OS << "<null>"; + return false; + } else if (shouldPrintInline(*Value.getNode())) { + OS << Value->getOperationName(G) << ':'; + Value->print_types(OS, G); + Value->print_details(OS, G); + return true; + } else { + OS << PrintNodeId(*Value.getNode()); + if (unsigned RN = Value.getResNo()) + OS << ':' << RN; + return false; + } +} + +typedef SmallPtrSet<const SDNode *, 128> VisitedSDNodeSet; +static void DumpNodesr(raw_ostream &OS, const SDNode *N, unsigned indent, + const SelectionDAG *G, VisitedSDNodeSet &once) { + if (!once.insert(N).second) // If we've been here before, return now. + return; + + // Dump the current SDNode, but don't end the line yet. + OS.indent(indent); + N->printr(OS, G); + + // Having printed this SDNode, walk the children: + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + if (i) OS << ","; + OS << " "; + + const SDValue Op = N->getOperand(i); + bool printedInline = printOperand(OS, G, Op); + if (printedInline) + once.insert(Op.getNode()); + } + + OS << "\n"; + + // Dump children that have grandchildren on their own line(s). + for (const SDValue &Op : N->op_values()) + DumpNodesr(OS, Op.getNode(), indent+2, G, once); +} + +void SDNode::dumpr() const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, nullptr, once); +} + +void SDNode::dumpr(const SelectionDAG *G) const { + VisitedSDNodeSet once; + DumpNodesr(dbgs(), this, 0, G, once); +} + +static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, + const SelectionDAG *G, unsigned depth, + unsigned indent) { + if (depth == 0) + return; + + OS.indent(indent); + + N->print(OS, G); + + if (depth < 1) + return; + + for (const SDValue &Op : N->op_values()) { + // Don't follow chain operands. + if (Op.getValueType() == MVT::Other) + continue; + OS << '\n'; + printrWithDepthHelper(OS, Op.getNode(), G, depth-1, indent+2); + } +} + +void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, + unsigned depth) const { + printrWithDepthHelper(OS, this, G, depth, 0); +} + +void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { + // Don't print impossibly deep things. + printrWithDepth(OS, G, 10); +} + +void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { + printrWithDepth(dbgs(), G, depth); +} + +void SDNode::dumprFull(const SelectionDAG *G) const { + // Don't print impossibly deep things. + dumprWithDepth(G, 10); +} + +void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const { + printr(OS, G); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + if (i) OS << ", "; else OS << " "; + printOperand(OS, G, getOperand(i)); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp new file mode 100644 index 0000000..9f8759d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -0,0 +1,3440 @@ +//===-- SelectionDAGISel.cpp - Implement the SelectionDAGISel class -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAGISel class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "ScheduleDAGSDNodes.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "isel" + +STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on"); +STATISTIC(NumFastIselSuccess, "Number of instructions fast isel selected"); +STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel"); +STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG"); +STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path"); +STATISTIC(NumEntryBlocks, "Number of entry blocks encountered"); +STATISTIC(NumFastIselFailLowerArguments, + "Number of entry blocks where fast isel failed to lower arguments"); + +#ifndef NDEBUG +static cl::opt<bool> +EnableFastISelVerbose2("fast-isel-verbose2", cl::Hidden, + cl::desc("Enable extra verbose messages in the \"fast\" " + "instruction selector")); + + // Terminators +STATISTIC(NumFastIselFailRet,"Fast isel fails on Ret"); +STATISTIC(NumFastIselFailBr,"Fast isel fails on Br"); +STATISTIC(NumFastIselFailSwitch,"Fast isel fails on Switch"); +STATISTIC(NumFastIselFailIndirectBr,"Fast isel fails on IndirectBr"); +STATISTIC(NumFastIselFailInvoke,"Fast isel fails on Invoke"); +STATISTIC(NumFastIselFailResume,"Fast isel fails on Resume"); +STATISTIC(NumFastIselFailUnreachable,"Fast isel fails on Unreachable"); + + // Standard binary operators... +STATISTIC(NumFastIselFailAdd,"Fast isel fails on Add"); +STATISTIC(NumFastIselFailFAdd,"Fast isel fails on FAdd"); +STATISTIC(NumFastIselFailSub,"Fast isel fails on Sub"); +STATISTIC(NumFastIselFailFSub,"Fast isel fails on FSub"); +STATISTIC(NumFastIselFailMul,"Fast isel fails on Mul"); +STATISTIC(NumFastIselFailFMul,"Fast isel fails on FMul"); +STATISTIC(NumFastIselFailUDiv,"Fast isel fails on UDiv"); +STATISTIC(NumFastIselFailSDiv,"Fast isel fails on SDiv"); +STATISTIC(NumFastIselFailFDiv,"Fast isel fails on FDiv"); +STATISTIC(NumFastIselFailURem,"Fast isel fails on URem"); +STATISTIC(NumFastIselFailSRem,"Fast isel fails on SRem"); +STATISTIC(NumFastIselFailFRem,"Fast isel fails on FRem"); + + // Logical operators... +STATISTIC(NumFastIselFailAnd,"Fast isel fails on And"); +STATISTIC(NumFastIselFailOr,"Fast isel fails on Or"); +STATISTIC(NumFastIselFailXor,"Fast isel fails on Xor"); + + // Memory instructions... +STATISTIC(NumFastIselFailAlloca,"Fast isel fails on Alloca"); +STATISTIC(NumFastIselFailLoad,"Fast isel fails on Load"); +STATISTIC(NumFastIselFailStore,"Fast isel fails on Store"); +STATISTIC(NumFastIselFailAtomicCmpXchg,"Fast isel fails on AtomicCmpXchg"); +STATISTIC(NumFastIselFailAtomicRMW,"Fast isel fails on AtomicRWM"); +STATISTIC(NumFastIselFailFence,"Fast isel fails on Frence"); +STATISTIC(NumFastIselFailGetElementPtr,"Fast isel fails on GetElementPtr"); + + // Convert instructions... +STATISTIC(NumFastIselFailTrunc,"Fast isel fails on Trunc"); +STATISTIC(NumFastIselFailZExt,"Fast isel fails on ZExt"); +STATISTIC(NumFastIselFailSExt,"Fast isel fails on SExt"); +STATISTIC(NumFastIselFailFPTrunc,"Fast isel fails on FPTrunc"); +STATISTIC(NumFastIselFailFPExt,"Fast isel fails on FPExt"); +STATISTIC(NumFastIselFailFPToUI,"Fast isel fails on FPToUI"); +STATISTIC(NumFastIselFailFPToSI,"Fast isel fails on FPToSI"); +STATISTIC(NumFastIselFailUIToFP,"Fast isel fails on UIToFP"); +STATISTIC(NumFastIselFailSIToFP,"Fast isel fails on SIToFP"); +STATISTIC(NumFastIselFailIntToPtr,"Fast isel fails on IntToPtr"); +STATISTIC(NumFastIselFailPtrToInt,"Fast isel fails on PtrToInt"); +STATISTIC(NumFastIselFailBitCast,"Fast isel fails on BitCast"); + + // Other instructions... +STATISTIC(NumFastIselFailICmp,"Fast isel fails on ICmp"); +STATISTIC(NumFastIselFailFCmp,"Fast isel fails on FCmp"); +STATISTIC(NumFastIselFailPHI,"Fast isel fails on PHI"); +STATISTIC(NumFastIselFailSelect,"Fast isel fails on Select"); +STATISTIC(NumFastIselFailCall,"Fast isel fails on Call"); +STATISTIC(NumFastIselFailShl,"Fast isel fails on Shl"); +STATISTIC(NumFastIselFailLShr,"Fast isel fails on LShr"); +STATISTIC(NumFastIselFailAShr,"Fast isel fails on AShr"); +STATISTIC(NumFastIselFailVAArg,"Fast isel fails on VAArg"); +STATISTIC(NumFastIselFailExtractElement,"Fast isel fails on ExtractElement"); +STATISTIC(NumFastIselFailInsertElement,"Fast isel fails on InsertElement"); +STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); +STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); +STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); +STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); + +// Intrinsic instructions... +STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call"); +STATISTIC(NumFastIselFailSAddWithOverflow, + "Fast isel fails on sadd.with.overflow"); +STATISTIC(NumFastIselFailUAddWithOverflow, + "Fast isel fails on uadd.with.overflow"); +STATISTIC(NumFastIselFailSSubWithOverflow, + "Fast isel fails on ssub.with.overflow"); +STATISTIC(NumFastIselFailUSubWithOverflow, + "Fast isel fails on usub.with.overflow"); +STATISTIC(NumFastIselFailSMulWithOverflow, + "Fast isel fails on smul.with.overflow"); +STATISTIC(NumFastIselFailUMulWithOverflow, + "Fast isel fails on umul.with.overflow"); +STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress"); +STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call"); +STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call"); +STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call"); +#endif + +static cl::opt<bool> +EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, + cl::desc("Enable verbose messages in the \"fast\" " + "instruction selector")); +static cl::opt<int> EnableFastISelAbort( + "fast-isel-abort", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction: 0 disable the abort, 1 will " + "abort but for args, calls and terminators, 2 will also " + "abort for argument lowering, and 3 will never fallback " + "to SelectionDAG.")); + +static cl::opt<bool> +UseMBPI("use-mbpi", + cl::desc("use Machine Branch Probability Info"), + cl::init(true), cl::Hidden); + +#ifndef NDEBUG +static cl::opt<std::string> +FilterDAGBasicBlockName("filter-view-dags", cl::Hidden, + cl::desc("Only display the basic block whose name " + "matches this for all view-*-dags options")); +static cl::opt<bool> +ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the first " + "dag combine pass")); +static cl::opt<bool> +ViewLegalizeTypesDAGs("view-legalize-types-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize types")); +static cl::opt<bool> +ViewLegalizeDAGs("view-legalize-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before legalize")); +static cl::opt<bool> +ViewDAGCombine2("view-dag-combine2-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the second " + "dag combine pass")); +static cl::opt<bool> +ViewDAGCombineLT("view-dag-combine-lt-dags", cl::Hidden, + cl::desc("Pop up a window to show dags before the post legalize types" + " dag combine pass")); +static cl::opt<bool> +ViewISelDAGs("view-isel-dags", cl::Hidden, + cl::desc("Pop up a window to show isel dags as they are selected")); +static cl::opt<bool> +ViewSchedDAGs("view-sched-dags", cl::Hidden, + cl::desc("Pop up a window to show sched dags as they are processed")); +static cl::opt<bool> +ViewSUnitDAGs("view-sunit-dags", cl::Hidden, + cl::desc("Pop up a window to show SUnit dags after they are processed")); +#else +static const bool ViewDAGCombine1 = false, + ViewLegalizeTypesDAGs = false, ViewLegalizeDAGs = false, + ViewDAGCombine2 = false, + ViewDAGCombineLT = false, + ViewISelDAGs = false, ViewSchedDAGs = false, + ViewSUnitDAGs = false; +#endif + +//===---------------------------------------------------------------------===// +/// +/// RegisterScheduler class - Track the registration of instruction schedulers. +/// +//===---------------------------------------------------------------------===// +MachinePassRegistry RegisterScheduler::Registry; + +//===---------------------------------------------------------------------===// +/// +/// ISHeuristic command line option for instruction schedulers. +/// +//===---------------------------------------------------------------------===// +static cl::opt<RegisterScheduler::FunctionPassCtor, false, + RegisterPassParser<RegisterScheduler> > +ISHeuristic("pre-RA-sched", + cl::init(&createDefaultScheduler), cl::Hidden, + cl::desc("Instruction schedulers available (before register" + " allocation):")); + +static RegisterScheduler +defaultListDAGScheduler("default", "Best scheduler for the target", + createDefaultScheduler); + +namespace llvm { + //===--------------------------------------------------------------------===// + /// \brief This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel + << " ; After: -O" << NewOptLevel << "\n"); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) { + IS.TM.setFastISel(IS.TM.getO0WantsFastISel()); + DEBUG(dbgs() << "\tFastISel is " + << (IS.TM.Options.EnableFastISel ? "enabled" : "disabled") + << "\n"); + } + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel + << " ; After: -O" << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// + /// createDefaultScheduler - This creates an instruction scheduler appropriate + /// for the target. + ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetLowering *TLI = IS->TLI; + const TargetSubtargetInfo &ST = IS->MF->getSubtarget(); + + // Try first to see if the Target has its own way of selecting a scheduler + if (auto *SchedulerCtor = ST.getDAGScheduler(OptLevel)) { + return SchedulerCtor(IS, OptLevel); + } + + if (OptLevel == CodeGenOpt::None || + (ST.enableMachineScheduler() && ST.enableMachineSchedDefaultSched()) || + TLI->getSchedulingPreference() == Sched::Source) + return createSourceListDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::RegPressure) + return createBURRListDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::Hybrid) + return createHybridListDAGScheduler(IS, OptLevel); + if (TLI->getSchedulingPreference() == Sched::VLIW) + return createVLIWDAGScheduler(IS, OptLevel); + assert(TLI->getSchedulingPreference() == Sched::ILP && + "Unknown sched type!"); + return createILPListDAGScheduler(IS, OptLevel); + } +} + +// EmitInstrWithCustomInserter - This method should be implemented by targets +// that mark instructions with the 'usesCustomInserter' flag. These +// instructions are special in various ways, which require special support to +// insert. The specified MachineInstr is created but not inserted into any +// basic blocks, and this method is called to expand it into a sequence of +// instructions, potentially also creating new basic blocks and control flow. +// When new basic blocks are inserted and the edges from MBB to its successors +// are modified, the method should insert pairs of <OldSucc, NewSucc> into the +// DenseMap. +MachineBasicBlock * +TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, + MachineBasicBlock *MBB) const { +#ifndef NDEBUG + dbgs() << "If a target marks an instruction with " + "'usesCustomInserter', it must implement " + "TargetLowering::EmitInstrWithCustomInserter!"; +#endif + llvm_unreachable(nullptr); +} + +void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, + SDNode *Node) const { + assert(!MI->hasPostISelHook() && + "If a target marks an instruction with 'hasPostISelHook', " + "it must implement TargetLowering::AdjustInstrPostInstrSelection!"); +} + +//===----------------------------------------------------------------------===// +// SelectionDAGISel code +//===----------------------------------------------------------------------===// + +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, + CodeGenOpt::Level OL) : + MachineFunctionPass(ID), TM(tm), + FuncInfo(new FunctionLoweringInfo()), + CurDAG(new SelectionDAG(tm, OL)), + SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), + GFI(), + OptLevel(OL), + DAGSize(0) { + initializeGCModuleInfoPass(*PassRegistry::getPassRegistry()); + initializeBranchProbabilityInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); + initializeTargetLibraryInfoWrapperPassPass( + *PassRegistry::getPassRegistry()); + } + +SelectionDAGISel::~SelectionDAGISel() { + delete SDB; + delete CurDAG; + delete FuncInfo; +} + +void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<AAResultsWrapperPass>(); + AU.addRequired<GCModuleInfo>(); + AU.addPreserved<GCModuleInfo>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + if (UseMBPI && OptLevel != CodeGenOpt::None) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that +/// may trap on it. In this case we have to split the edge so that the path +/// through the predecessor block that doesn't go to the phi block doesn't +/// execute the possibly trapping instruction. +/// +/// This is required for correctness, so it must be done at -O0. +/// +static void SplitCriticalSideEffectEdges(Function &Fn) { + // Loop for blocks with phi nodes. + for (BasicBlock &BB : Fn) { + PHINode *PN = dyn_cast<PHINode>(BB.begin()); + if (!PN) continue; + + ReprocessBlock: + // For each block with a PHI node, check to see if any of the input values + // are potentially trapping constant expressions. Constant expressions are + // the only potentially trapping value that can occur as the argument to a + // PHI. + for (BasicBlock::iterator I = BB.begin(); (PN = dyn_cast<PHINode>(I)); ++I) + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i)); + if (!CE || !CE->canTrap()) continue; + + // The only case we have to worry about is when the edge is critical. + // Since this block has a PHI Node, we assume it has multiple input + // edges: check to see if the pred has multiple successors. + BasicBlock *Pred = PN->getIncomingBlock(i); + if (Pred->getTerminator()->getNumSuccessors() == 1) + continue; + + // Okay, we have to split this edge. + SplitCriticalEdge( + Pred->getTerminator(), GetSuccessorNumber(Pred, &BB), + CriticalEdgeSplittingOptions().setMergeIdenticalEdges()); + goto ReprocessBlock; + } + } +} + +bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { + // Do some sanity-checking on the command-line options. + assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) && + "-fast-isel-verbose requires -fast-isel"); + assert((!EnableFastISelAbort || TM.Options.EnableFastISel) && + "-fast-isel-abort > 0 requires -fast-isel"); + + const Function &Fn = *mf.getFunction(); + MF = &mf; + + // Reset the target options before resetting the optimization + // level below. + // FIXME: This is a horrible hack and should be processed via + // codegen looking at the optimization level explicitly when + // it wants to look at it. + TM.resetTargetOptions(Fn); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + + TII = MF->getSubtarget().getInstrInfo(); + TLI = MF->getSubtarget().getTargetLowering(); + RegInfo = &MF->getRegInfo(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr; + + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); + + SplitCriticalSideEffectEdges(const_cast<Function &>(Fn)); + + CurDAG->init(*MF); + FuncInfo->set(Fn, *MF, CurDAG); + + if (UseMBPI && OptLevel != CodeGenOpt::None) + FuncInfo->BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + else + FuncInfo->BPI = nullptr; + + SDB->init(GFI, *AA, LibInfo); + + MF->setHasInlineAsm(false); + + FuncInfo->SplitCSR = false; + SmallVector<MachineBasicBlock*, 4> Returns; + + // We split CSR if the target supports it for the given function + // and the function has only return exits. + if (TLI->supportSplitCSR(MF)) { + FuncInfo->SplitCSR = true; + + // Collect all the return blocks. + for (const BasicBlock &BB : Fn) { + if (!succ_empty(&BB)) + continue; + + const TerminatorInst *Term = BB.getTerminator(); + if (isa<UnreachableInst>(Term)) + continue; + if (isa<ReturnInst>(Term)) { + Returns.push_back(FuncInfo->MBBMap[&BB]); + continue; + } + + // Bail out if the exit block is not Return nor Unreachable. + FuncInfo->SplitCSR = false; + break; + } + } + + MachineBasicBlock *EntryMBB = &MF->front(); + if (FuncInfo->SplitCSR) + // This performs initialization so lowering for SplitCSR will be correct. + TLI->initializeSplitCSR(EntryMBB); + + SelectAllBasicBlocks(Fn); + + // If the first basic block in the function has live ins that need to be + // copied into vregs, emit the copies into the top of the block before + // emitting the code for the block. + const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); + RegInfo->EmitLiveInCopies(EntryMBB, TRI, *TII); + + // Insert copies in the entry block and the return blocks. + if (FuncInfo->SplitCSR) + TLI->insertCopiesSplitCSR(EntryMBB, Returns); + + DenseMap<unsigned, unsigned> LiveInMap; + if (!FuncInfo->ArgDbgValues.empty()) + for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(), + E = RegInfo->livein_end(); LI != E; ++LI) + if (LI->second) + LiveInMap.insert(std::make_pair(LI->first, LI->second)); + + // Insert DBG_VALUE instructions for function arguments to the entry block. + for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { + MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; + bool hasFI = MI->getOperand(0).isFI(); + unsigned Reg = + hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + EntryMBB->insert(EntryMBB->begin(), MI); + else { + MachineInstr *Def = RegInfo->getVRegDef(Reg); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(std::next(InsertPos), MI); + } else + DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); + } + + // If Reg is live-in then update debug info to track its copy in a vreg. + DenseMap<unsigned, unsigned>::iterator LDI = LiveInMap.find(Reg); + if (LDI != LiveInMap.end()) { + assert(!hasFI && "There's no handling of frame pointer updating here yet " + "- add if needed"); + MachineInstr *Def = RegInfo->getVRegDef(LDI->second); + MachineBasicBlock::iterator InsertPos = Def; + const MDNode *Variable = MI->getDebugVariable(); + const MDNode *Expr = MI->getDebugExpression(); + DebugLoc DL = MI->getDebugLoc(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; + assert(cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(DL) && + "Expected inlined-at fields to agree"); + // Def is never a terminator here, so it is ok to increment InsertPos. + BuildMI(*EntryMBB, ++InsertPos, DL, TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, LDI->second, Offset, Variable, Expr); + + // If this vreg is directly copied into an exported register then + // that COPY instructions also need DBG_VALUE, if it is the only + // user of LDI->second. + MachineInstr *CopyUseMI = nullptr; + for (MachineRegisterInfo::use_instr_iterator + UI = RegInfo->use_instr_begin(LDI->second), + E = RegInfo->use_instr_end(); UI != E; ) { + MachineInstr *UseMI = &*(UI++); + if (UseMI->isDebugValue()) continue; + if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) { + CopyUseMI = UseMI; continue; + } + // Otherwise this is another use or second copy use. + CopyUseMI = nullptr; break; + } + if (CopyUseMI) { + // Use MI's debug location, which describes where Variable was + // declared, rather than whatever is attached to CopyUseMI. + MachineInstr *NewMI = + BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect, + CopyUseMI->getOperand(0).getReg(), Offset, Variable, Expr); + MachineBasicBlock::iterator Pos = CopyUseMI; + EntryMBB->insertAfter(Pos, NewMI); + } + } + } + + // Determine if there are any calls in this machine function. + MachineFrameInfo *MFI = MF->getFrameInfo(); + for (const auto &MBB : *MF) { + if (MFI->hasCalls() && MF->hasInlineAsm()) + break; + + for (const auto &MI : MBB) { + const MCInstrDesc &MCID = TII->get(MI.getOpcode()); + if ((MCID.isCall() && !MCID.isReturn()) || + MI.isStackAligningInlineAsm()) { + MFI->setHasCalls(true); + } + if (MI.isInlineAsm()) { + MF->setHasInlineAsm(true); + } + } + } + + // Determine if there is a call to setjmp in the machine function. + MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); + + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap<unsigned, unsigned>::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); + // Replace it. + + + // Replacing one register with another won't touch the kill flags. + // We need to conservatively clear the kill flags as a kill on the old + // register might dominate existing uses of the new register. + if (!MRI.use_empty(To)) + MRI.clearKillFlags(From); + MRI.replaceRegWith(From, To); + } + + if (TLI->hasCopyImplyingStackAdjustment(MF)) + MFI->setHasOpaqueSPAdjustment(true); + + // Freeze the set of reserved registers now that MachineFrameInfo has been + // set up. All the information required by getReservedRegs() should be + // available now. + MRI.freezeReservedRegs(*MF); + + // Release function-specific state. SDB and CurDAG are already cleared + // at this point. + FuncInfo->clear(); + + DEBUG(dbgs() << "*** MachineFunction at end of ISel ***\n"); + DEBUG(MF->print(dbgs())); + + return true; +} + +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { + // Lower the instructions. If a call is emitted as a tail call, cease emitting + // nodes for this block. + for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) + SDB->visit(*I); + + // Make sure the root of the DAG is up-to-date. + CurDAG->setRoot(SDB->getControlRoot()); + HadTailCall = SDB->HasTailCall; + SDB->clear(); + + // Final step, emit the lowered DAG as machine code. + CodeGenAndEmitDAG(); +} + +void SelectionDAGISel::ComputeLiveOutVRegInfo() { + SmallPtrSet<SDNode*, 128> VisitedNodes; + SmallVector<SDNode*, 128> Worklist; + + Worklist.push_back(CurDAG->getRoot().getNode()); + + APInt KnownZero; + APInt KnownOne; + + do { + SDNode *N = Worklist.pop_back_val(); + + // If we've already seen this node, ignore it. + if (!VisitedNodes.insert(N).second) + continue; + + // Otherwise, add all chain operands to the worklist. + for (const SDValue &Op : N->op_values()) + if (Op.getValueType() == MVT::Other) + Worklist.push_back(Op.getNode()); + + // If this is a CopyToReg with a vreg dest, process it. + if (N->getOpcode() != ISD::CopyToReg) + continue; + + unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + continue; + + // Ignore non-scalar or non-integer values. + SDValue Src = N->getOperand(2); + EVT SrcVT = Src.getValueType(); + if (!SrcVT.isInteger() || SrcVT.isVector()) + continue; + + unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); + CurDAG->computeKnownBits(Src, KnownZero, KnownOne); + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); + } while (!Worklist.empty()); +} + +void SelectionDAGISel::CodeGenAndEmitDAG() { + std::string GroupName; + if (TimePassesIsEnabled) + GroupName = "Instruction Selection and Scheduling"; + std::string BlockName; + int BlockNumber = -1; + (void)BlockNumber; + bool MatchFilterBB = false; (void)MatchFilterBB; +#ifndef NDEBUG + MatchFilterBB = (FilterDAGBasicBlockName.empty() || + FilterDAGBasicBlockName == + FuncInfo->MBB->getBasicBlock()->getName().str()); +#endif +#ifdef NDEBUG + if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || + ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || + ViewSUnitDAGs) +#endif + { + BlockNumber = FuncInfo->MBB->getNumber(); + BlockName = + (MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str(); + } + DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (ViewDAGCombine1 && MatchFilterBB) + CurDAG->viewGraph("dag-combine1 input for " + BlockName); + + // Run the DAG combiner in pre-legalize mode. + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + // Second step, hack on the DAG until it only uses operations and types that + // the target supports. + if (ViewLegalizeTypesDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize-types input for " + BlockName); + + bool Changed; + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); + Changed = CurDAG->LegalizeTypes(); + } + + DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + CurDAG->NewNodesMustHaveLegalTypes = true; + + if (Changed) { + if (ViewDAGCombineLT && MatchFilterBB) + CurDAG->viewGraph("dag-combine-lt input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + } + + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); + Changed = CurDAG->LegalizeVectors(); + } + + if (Changed) { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); + CurDAG->LegalizeTypes(); + } + + if (ViewDAGCombineLT && MatchFilterBB) + CurDAG->viewGraph("dag-combine-lv input for " + BlockName); + + // Run the DAG combiner in post-type-legalize mode. + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" + << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + } + + if (ViewLegalizeDAGs && MatchFilterBB) + CurDAG->viewGraph("legalize input for " + BlockName); + + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); + CurDAG->Legalize(); + } + + DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (ViewDAGCombine2 && MatchFilterBB) + CurDAG->viewGraph("dag-combine2 input for " + BlockName); + + // Run the DAG combiner in post-legalize mode. + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); + } + + DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (OptLevel != CodeGenOpt::None) + ComputeLiveOutVRegInfo(); + + if (ViewISelDAGs && MatchFilterBB) + CurDAG->viewGraph("isel input for " + BlockName); + + // Third, instruction select all of the operations to machine code, adding the + // code to the MachineBasicBlock. + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); + DoInstructionSelection(); + } + + DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); + + if (ViewSchedDAGs && MatchFilterBB) + CurDAG->viewGraph("scheduler input for " + BlockName); + + // Schedule machine code. + ScheduleDAGSDNodes *Scheduler = CreateScheduler(); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB); + } + + if (ViewSUnitDAGs && MatchFilterBB) Scheduler->viewGraph(); + + // Emit machine code to BB. This can change 'BB' to the last block being + // inserted into. + MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB; + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + // FuncInfo->InsertPt is passed by reference and set to the end of the + // scheduled instructions. + LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule(FuncInfo->InsertPt); + } + + // If the block was split, make sure we update any references that are used to + // update PHI nodes later on. + if (FirstMBB != LastMBB) + SDB->UpdateSplitBlock(FirstMBB, LastMBB); + + // Free the scheduler state. + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); + delete Scheduler; + } + + // Free the SelectionDAG state, now that we're finished with it. + CurDAG->clear(); +} + +namespace { +/// ISelUpdater - helper class to handle updates of the instruction selection +/// graph. +class ISelUpdater : public SelectionDAG::DAGUpdateListener { + SelectionDAG::allnodes_iterator &ISelPosition; +public: + ISelUpdater(SelectionDAG &DAG, SelectionDAG::allnodes_iterator &isp) + : SelectionDAG::DAGUpdateListener(DAG), ISelPosition(isp) {} + + /// NodeDeleted - Handle nodes deleted from the graph. If the node being + /// deleted is the current ISelPosition node, update ISelPosition. + /// + void NodeDeleted(SDNode *N, SDNode *E) override { + if (ISelPosition == SelectionDAG::allnodes_iterator(N)) + ++ISelPosition; + } +}; +} // end anonymous namespace + +void SelectionDAGISel::DoInstructionSelection() { + DEBUG(dbgs() << "===== Instruction selection begins: BB#" + << FuncInfo->MBB->getNumber() + << " '" << FuncInfo->MBB->getName() << "'\n"); + + PreprocessISelDAG(); + + // Select target instructions for the DAG. + { + // Number all nodes with a topological order and set DAGSize. + DAGSize = CurDAG->AssignTopologicalOrder(); + + // Create a dummy node (which is not added to allnodes), that adds + // a reference to the root node, preventing it from being deleted, + // and tracking any changes of the root. + HandleSDNode Dummy(CurDAG->getRoot()); + SelectionDAG::allnodes_iterator ISelPosition (CurDAG->getRoot().getNode()); + ++ISelPosition; + + // Make sure that ISelPosition gets properly updated when nodes are deleted + // in calls made from this function. + ISelUpdater ISU(*CurDAG, ISelPosition); + + // The AllNodes list is now topological-sorted. Visit the + // nodes by starting at the end of the list (the root of the + // graph) and preceding back toward the beginning (the entry + // node). + while (ISelPosition != CurDAG->allnodes_begin()) { + SDNode *Node = &*--ISelPosition; + // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes, + // but there are currently some corner cases that it misses. Also, this + // makes it theoretically possible to disable the DAGCombiner. + if (Node->use_empty()) + continue; + + SDNode *ResNode = Select(Node); + + // FIXME: This is pretty gross. 'Select' should be changed to not return + // anything at all and this code should be nuked with a tactical strike. + + // If node should not be replaced, continue with the next one. + if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) + continue; + // Replace node. + if (ResNode) { + ReplaceUses(Node, ResNode); + } + + // If after the replacement this node is not used any more, + // remove this dead node. + if (Node->use_empty()) // Don't delete EntryToken, etc. + CurDAG->RemoveDeadNode(Node); + } + + CurDAG->setRoot(Dummy.getValue()); + } + + DEBUG(dbgs() << "===== Instruction selection ends:\n"); + + PostprocessISelDAG(); +} + +static bool hasExceptionPointerOrCodeUser(const CatchPadInst *CPI) { + for (const User *U : CPI->users()) { + if (const IntrinsicInst *EHPtrCall = dyn_cast<IntrinsicInst>(U)) { + Intrinsic::ID IID = EHPtrCall->getIntrinsicID(); + if (IID == Intrinsic::eh_exceptionpointer || + IID == Intrinsic::eh_exceptioncode) + return true; + } + } + return false; +} + +/// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and +/// do other setup for EH landing-pad blocks. +bool SelectionDAGISel::PrepareEHLandingPad() { + MachineBasicBlock *MBB = FuncInfo->MBB; + const Constant *PersonalityFn = FuncInfo->Fn->getPersonalityFn(); + const BasicBlock *LLVMBB = MBB->getBasicBlock(); + const TargetRegisterClass *PtrRC = + TLI->getRegClassFor(TLI->getPointerTy(CurDAG->getDataLayout())); + + // Catchpads have one live-in register, which typically holds the exception + // pointer or code. + if (const auto *CPI = dyn_cast<CatchPadInst>(LLVMBB->getFirstNonPHI())) { + if (hasExceptionPointerOrCodeUser(CPI)) { + // Get or create the virtual register to hold the pointer or code. Mark + // the live in physreg and copy into the vreg. + MCPhysReg EHPhysReg = TLI->getExceptionPointerRegister(PersonalityFn); + assert(EHPhysReg && "target lacks exception pointer register"); + MBB->addLiveIn(EHPhysReg); + unsigned VReg = FuncInfo->getCatchPadExceptionPointerVReg(CPI, PtrRC); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), + TII->get(TargetOpcode::COPY), VReg) + .addReg(EHPhysReg, RegState::Kill); + } + return true; + } + + if (!LLVMBB->isLandingPad()) + return true; + + // Add a label to mark the beginning of the landing pad. Deletion of the + // landing pad can thus be detected via the MachineModuleInfo. + MCSymbol *Label = MF->getMMI().addLandingPad(MBB); + + // Assign the call site to the landing pad's begin label. + MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]); + + const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL); + BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); + + // Mark exception register as live in. + if (unsigned Reg = TLI->getExceptionPointerRegister(PersonalityFn)) + FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); + + // Mark exception selector register as live in. + if (unsigned Reg = TLI->getExceptionSelectorRegister(PersonalityFn)) + FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); + + return true; +} + +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa<TerminatorInst>(I) && // Terminators aren't folded. + !isa<DbgInfoIntrinsic>(I) && // Debug instructions aren't folded. + !I->isEHPad() && // EH pad instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. +} + +#ifndef NDEBUG +// Collect per Instruction statistics for fast-isel misses. Only those +// instructions that cause the bail are accounted for. It does not account for +// instructions higher in the block. Thus, summing the per instructions stats +// will not add up to what is reported by NumFastIselFailures. +static void collectFailStats(const Instruction *I) { + switch (I->getOpcode()) { + default: assert (0 && "<Invalid operator> "); + + // Terminators + case Instruction::Ret: NumFastIselFailRet++; return; + case Instruction::Br: NumFastIselFailBr++; return; + case Instruction::Switch: NumFastIselFailSwitch++; return; + case Instruction::IndirectBr: NumFastIselFailIndirectBr++; return; + case Instruction::Invoke: NumFastIselFailInvoke++; return; + case Instruction::Resume: NumFastIselFailResume++; return; + case Instruction::Unreachable: NumFastIselFailUnreachable++; return; + + // Standard binary operators... + case Instruction::Add: NumFastIselFailAdd++; return; + case Instruction::FAdd: NumFastIselFailFAdd++; return; + case Instruction::Sub: NumFastIselFailSub++; return; + case Instruction::FSub: NumFastIselFailFSub++; return; + case Instruction::Mul: NumFastIselFailMul++; return; + case Instruction::FMul: NumFastIselFailFMul++; return; + case Instruction::UDiv: NumFastIselFailUDiv++; return; + case Instruction::SDiv: NumFastIselFailSDiv++; return; + case Instruction::FDiv: NumFastIselFailFDiv++; return; + case Instruction::URem: NumFastIselFailURem++; return; + case Instruction::SRem: NumFastIselFailSRem++; return; + case Instruction::FRem: NumFastIselFailFRem++; return; + + // Logical operators... + case Instruction::And: NumFastIselFailAnd++; return; + case Instruction::Or: NumFastIselFailOr++; return; + case Instruction::Xor: NumFastIselFailXor++; return; + + // Memory instructions... + case Instruction::Alloca: NumFastIselFailAlloca++; return; + case Instruction::Load: NumFastIselFailLoad++; return; + case Instruction::Store: NumFastIselFailStore++; return; + case Instruction::AtomicCmpXchg: NumFastIselFailAtomicCmpXchg++; return; + case Instruction::AtomicRMW: NumFastIselFailAtomicRMW++; return; + case Instruction::Fence: NumFastIselFailFence++; return; + case Instruction::GetElementPtr: NumFastIselFailGetElementPtr++; return; + + // Convert instructions... + case Instruction::Trunc: NumFastIselFailTrunc++; return; + case Instruction::ZExt: NumFastIselFailZExt++; return; + case Instruction::SExt: NumFastIselFailSExt++; return; + case Instruction::FPTrunc: NumFastIselFailFPTrunc++; return; + case Instruction::FPExt: NumFastIselFailFPExt++; return; + case Instruction::FPToUI: NumFastIselFailFPToUI++; return; + case Instruction::FPToSI: NumFastIselFailFPToSI++; return; + case Instruction::UIToFP: NumFastIselFailUIToFP++; return; + case Instruction::SIToFP: NumFastIselFailSIToFP++; return; + case Instruction::IntToPtr: NumFastIselFailIntToPtr++; return; + case Instruction::PtrToInt: NumFastIselFailPtrToInt++; return; + case Instruction::BitCast: NumFastIselFailBitCast++; return; + + // Other instructions... + case Instruction::ICmp: NumFastIselFailICmp++; return; + case Instruction::FCmp: NumFastIselFailFCmp++; return; + case Instruction::PHI: NumFastIselFailPHI++; return; + case Instruction::Select: NumFastIselFailSelect++; return; + case Instruction::Call: { + if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) { + switch (Intrinsic->getIntrinsicID()) { + default: + NumFastIselFailIntrinsicCall++; return; + case Intrinsic::sadd_with_overflow: + NumFastIselFailSAddWithOverflow++; return; + case Intrinsic::uadd_with_overflow: + NumFastIselFailUAddWithOverflow++; return; + case Intrinsic::ssub_with_overflow: + NumFastIselFailSSubWithOverflow++; return; + case Intrinsic::usub_with_overflow: + NumFastIselFailUSubWithOverflow++; return; + case Intrinsic::smul_with_overflow: + NumFastIselFailSMulWithOverflow++; return; + case Intrinsic::umul_with_overflow: + NumFastIselFailUMulWithOverflow++; return; + case Intrinsic::frameaddress: + NumFastIselFailFrameaddress++; return; + case Intrinsic::sqrt: + NumFastIselFailSqrt++; return; + case Intrinsic::experimental_stackmap: + NumFastIselFailStackMap++; return; + case Intrinsic::experimental_patchpoint_void: // fall-through + case Intrinsic::experimental_patchpoint_i64: + NumFastIselFailPatchPoint++; return; + } + } + NumFastIselFailCall++; + return; + } + case Instruction::Shl: NumFastIselFailShl++; return; + case Instruction::LShr: NumFastIselFailLShr++; return; + case Instruction::AShr: NumFastIselFailAShr++; return; + case Instruction::VAArg: NumFastIselFailVAArg++; return; + case Instruction::ExtractElement: NumFastIselFailExtractElement++; return; + case Instruction::InsertElement: NumFastIselFailInsertElement++; return; + case Instruction::ShuffleVector: NumFastIselFailShuffleVector++; return; + case Instruction::ExtractValue: NumFastIselFailExtractValue++; return; + case Instruction::InsertValue: NumFastIselFailInsertValue++; return; + case Instruction::LandingPad: NumFastIselFailLandingPad++; return; + } +} +#endif + +void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { + // Initialize the Fast-ISel state, if needed. + FastISel *FastIS = nullptr; + if (TM.Options.EnableFastISel) + FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + + // Iterate over all basic blocks in the function. + ReversePostOrderTraversal<const Function*> RPOT(&Fn); + for (ReversePostOrderTraversal<const Function*>::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + const BasicBlock *LLVMBB = *I; + + if (OptLevel != CodeGenOpt::None) { + bool AllPredsVisited = true; + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + if (!FuncInfo->VisitedBBs.count(*PI)) { + AllPredsVisited = false; + break; + } + } + + if (AllPredsVisited) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) + FuncInfo->ComputePHILiveOutRegInfo(PN); + } else { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + const PHINode *PN = dyn_cast<PHINode>(I); ++I) + FuncInfo->InvalidatePHILiveOutRegInfo(PN); + } + + FuncInfo->VisitedBBs.insert(LLVMBB); + } + + BasicBlock::const_iterator const Begin = + LLVMBB->getFirstNonPHI()->getIterator(); + BasicBlock::const_iterator const End = LLVMBB->end(); + BasicBlock::const_iterator BI = End; + + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + if (!FuncInfo->MBB) + continue; // Some blocks like catchpads have no code or MBB. + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + FuncInfo->ExceptionPointerVirtReg = 0; + FuncInfo->ExceptionSelectorVirtReg = 0; + if (LLVMBB->isEHPad()) + if (!PrepareEHLandingPad()) + continue; + + // Before doing SelectionDAG ISel, see if FastISel has been requested. + if (FastIS) { + FastIS->startNewBlock(); + + // Emit code for any incoming arguments. This must happen before + // beginning FastISel on the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; + + // Lower any arguments needed in this block if this is the entry block. + if (!FastIS->lowerArguments()) { + // Fast isel failed to lower these arguments + ++NumFastIselFailLowerArguments; + if (EnableFastISelAbort > 1) + report_fatal_error("FastISel didn't lower all arguments"); + + // Use SelectionDAG argument lowering + LowerArguments(Fn); + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(std::prev(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(nullptr); + } + + unsigned NumFastIselRemaining = std::distance(Begin, End); + // Do FastISel on as many instructions as possible. + for (; BI != Begin; --BI) { + const Instruction *Inst = &*std::prev(BI); + + // If we no longer require this instruction, skip it. + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) { + --NumFastIselRemaining; + continue; + } + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + + // Try to select the instruction with FastISel. + if (FastIS->selectInstruction(Inst)) { + --NumFastIselRemaining; + ++NumFastIselSuccess; + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != &*Begin) { + BeforeInst = &*std::prev(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa<LoadInst>(BeforeInst) && + BeforeInst->hasOneUse() && + FastIS->tryToFoldLoad(cast<LoadInst>(BeforeInst), Inst)) { + // If we succeeded, don't re-select the load. + BI = std::next(BasicBlock::const_iterator(BeforeInst)); + --NumFastIselRemaining; + ++NumFastIselSuccess; + } + continue; + } + +#ifndef NDEBUG + if (EnableFastISelVerbose2) + collectFailStats(Inst); +#endif + + // Then handle certain instructions as single-LLVM-Instruction blocks. + if (isa<CallInst>(Inst)) { + + if (EnableFastISelVerbose || EnableFastISelAbort) { + dbgs() << "FastISel missed call: "; + Inst->dump(); + } + if (EnableFastISelAbort > 2) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); + + if (!Inst->getType()->isVoidTy() && !Inst->getType()->isTokenTy() && + !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; + if (!R) + R = FuncInfo->CreateRegs(Inst->getType()); + } + + bool HadTailCall = false; + MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; + SelectBasicBlock(Inst->getIterator(), BI, HadTailCall); + + // If the call was emitted as a tail call, we're done with the block. + // We also need to delete any previously emitted instructions. + if (HadTailCall) { + FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); + --BI; + break; + } + + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; + NumFastIselRemaining = RemainingNow; + continue; + } + + bool ShouldAbort = EnableFastISelAbort; + if (EnableFastISelVerbose || EnableFastISelAbort) { + if (isa<TerminatorInst>(Inst)) { + // Use a different message for terminator misses. + dbgs() << "FastISel missed terminator: "; + // Don't abort unless for terminator unless the level is really high + ShouldAbort = (EnableFastISelAbort > 2); + } else { + dbgs() << "FastISel miss: "; + } + Inst->dump(); + } + if (ShouldAbort) + // FastISel selector couldn't handle something and bailed. + // For the purpose of debugging, just abort. + report_fatal_error("FastISel didn't select the entire block"); + + NumFastIselFailures += NumFastIselRemaining; + break; + } + + FastIS->recomputeInsertPt(); + } else { + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &Fn.getEntryBlock()) { + ++NumEntryBlocks; + LowerArguments(Fn); + } + } + + if (Begin != BI) + ++NumDAGBlocks; + else + ++NumFastIselBlocks; + + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } + + FinishBasicBlock(); + FuncInfo->PHINodesToUpdate.clear(); + } + + delete FastIS; + SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr *MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI->isCopy() && !MI->isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI->isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI->isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI->operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || + (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && + TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; +} + +void +SelectionDAGISel::FinishBasicBlock() { + + DEBUG(dbgs() << "Total amount of phi nodes to update: " + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + dbgs() << "Node " << i << " : (" + << FuncInfo->PHINodesToUpdate[i].first + << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + + // Next, now that we know what the last MBB the LLVM BB expanded is, update + // PHI nodes in successors. + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) + continue; + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); + } + + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (!FailureMBB->size()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->BitTestCases[i].Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + BranchProbability UnhandledProb = SDB->BitTestCases[i].Prob; + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { + UnhandledProb -= SDB->BitTestCases[i].Cases[j].ExtraProb; + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + + // If all cases cover a contiguous range, it is not necessary to jump to + // the default block after the last bit test fails. This is because the + // range check during bit test header creation has guaranteed that every + // case here doesn't go outside the range. + MachineBasicBlock *NextMBB; + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].TargetBB; + else if (j + 1 != ej) + NextMBB = SDB->BitTestCases[i].Cases[j + 1].ThisBB; + else + NextMBB = SDB->BitTestCases[i].Default; + + SDB->visitBitTestCase(SDB->BitTestCases[i], + NextMBB, + UnhandledProb, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j], + FuncInfo->MBB); + + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + if (SDB->BitTestCases[i].ContiguousRange && j + 2 == ej) + break; + } + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // This is "default" BB. We have two jumps to it. From "header" BB and + // from last "case" BB. + if (PHIBB == SDB->BitTestCases[i].Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Parent) + .addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB); + // One of "cases" BB. + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); + j != ej; ++j) { + MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; + if (cBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); + } + } + } + SDB->BitTestCases.clear(); + + // If the JumpTable record is filled in, then we need to emit a jump table. + // Updating the PHI nodes is tricky in this case, since we need to determine + // whether the PHI is a successor of the range check MBB or the jump table MBB + for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) { + // Lower header first, if it wasn't already lowered + if (!SDB->JTCases[i].first.Emitted) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, + FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // Emit the code + SDB->visitJumpTable(SDB->JTCases[i].second); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Update PHI Nodes + for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); + pi != pe; ++pi) { + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); + MachineBasicBlock *PHIBB = PHI->getParent(); + assert(PHI->isPHI() && + "This is not a machine PHI node that we are updating!"); + // "default" BB. We can go there only from header BB. + if (PHIBB == SDB->JTCases[i].second.Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->JTCases[i].first.HeaderBB); + // JT BB. Just iterate over successors here + if (FuncInfo->MBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); + } + } + SDB->JTCases.clear(); + + // If we generated any switch lowering information, build and codegen any + // additional DAGs necessary. + for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { + // Set the current basic block to the mbb we wish to insert the code into + FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + + // Determine the unique successors. + SmallVector<MachineBasicBlock *, 2> Succs; + Succs.push_back(SDB->SwitchCases[i].TrueBB); + if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB) + Succs.push_back(SDB->SwitchCases[i].FalseBB); + + // Emit the code. Note that this could result in FuncInfo->MBB being split. + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // Remember the last block, now that any splitting is done, for use in + // populating PHI nodes in successors. + MachineBasicBlock *ThisBB = FuncInfo->MBB; + + // Handle any PHI nodes in successors of this chunk, as if we were coming + // from the original BB before switch expansion. Note that PHI nodes can + // occur multiple times in PHINodesToUpdate. We have to be very careful to + // handle them the right number of times. + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator + MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); + MBBI != MBBE && MBBI->isPHI(); ++MBBI) { + MachineInstrBuilder PHI(*MF, MBBI); + // This value for this PHI node is recorded in PHINodesToUpdate. + for (unsigned pn = 0; ; ++pn) { + assert(pn != FuncInfo->PHINodesToUpdate.size() && + "Didn't find PHI entry!"); + if (FuncInfo->PHINodesToUpdate[pn].first == PHI) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); + break; + } + } + } + } + } + } + SDB->SwitchCases.clear(); +} + + +/// Create the scheduler. If a specific scheduler was specified +/// via the SchedulerRegistry, use it, otherwise select the +/// one preferred by the target. +/// +ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() { + return ISHeuristic(this, OptLevel); +} + +//===----------------------------------------------------------------------===// +// Helper functions used by the generated instruction selector. +//===----------------------------------------------------------------------===// +// Calls to these methods are generated by tblgen. + +/// CheckAndMask - The isel is trying to match something like (and X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an AND, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckAndMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + if (CurDAG->MaskedValueIsZero(LHS, NeededMask)) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// CheckOrMask - The isel is trying to match something like (or X, 255). If +/// the dag combiner simplified the 255, we still want to match. RHS is the +/// actual value in the DAG on the RHS of an OR, and DesiredMaskS is the value +/// specified in the .td file (e.g. 255). +bool SelectionDAGISel::CheckOrMask(SDValue LHS, ConstantSDNode *RHS, + int64_t DesiredMaskS) const { + const APInt &ActualMask = RHS->getAPIntValue(); + const APInt &DesiredMask = APInt(LHS.getValueSizeInBits(), DesiredMaskS); + + // If the actual mask exactly matches, success! + if (ActualMask == DesiredMask) + return true; + + // If the actual AND mask is allowing unallowed bits, this doesn't match. + if (ActualMask.intersects(~DesiredMask)) + return false; + + // Otherwise, the DAG Combiner may have proven that the value coming in is + // either already zero or is not demanded. Check for known zero input bits. + APInt NeededMask = DesiredMask & ~ActualMask; + + APInt KnownZero, KnownOne; + CurDAG->computeKnownBits(LHS, KnownZero, KnownOne); + + // If all the missing bits in the or are already known to be set, match! + if ((NeededMask & KnownOne) == NeededMask) + return true; + + // TODO: check to see if missing bits are just not demanded. + + // Otherwise, this pattern doesn't match. + return false; +} + +/// SelectInlineAsmMemoryOperands - Calls to this are automatically generated +/// by tblgen. Others should not call it. +void SelectionDAGISel:: +SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops, SDLoc DL) { + std::vector<SDValue> InOps; + std::swap(InOps, Ops); + + Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 + Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 + Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack) + + unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); + if (InOps[e-1].getValueType() == MVT::Glue) + --e; // Don't process a glue operand if it is here. + + while (i != e) { + unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue(); + if (!InlineAsm::isMemKind(Flags)) { + // Just skip over this operand, copying the operands verbatim. + Ops.insert(Ops.end(), InOps.begin()+i, + InOps.begin()+i+InlineAsm::getNumOperandRegisters(Flags) + 1); + i += InlineAsm::getNumOperandRegisters(Flags) + 1; + } else { + assert(InlineAsm::getNumOperandRegisters(Flags) == 1 && + "Memory operand with multiple values?"); + + unsigned TiedToOperand; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedToOperand)) { + // We need the constraint ID from the operand this is tied to. + unsigned CurOp = InlineAsm::Op_FirstOperand; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + for (; TiedToOperand; --TiedToOperand) { + CurOp += InlineAsm::getNumOperandRegisters(Flags)+1; + Flags = cast<ConstantSDNode>(InOps[CurOp])->getZExtValue(); + } + } + + // Otherwise, this is a memory operand. Ask the target to select it. + std::vector<SDValue> SelOps; + if (SelectInlineAsmMemoryOperand(InOps[i+1], + InlineAsm::getMemoryConstraintID(Flags), + SelOps)) + report_fatal_error("Could not match memory address. Inline asm" + " failure!"); + + // Add this to the output node. + unsigned NewFlags = + InlineAsm::getFlagWord(InlineAsm::Kind_Mem, SelOps.size()); + Ops.push_back(CurDAG->getTargetConstant(NewFlags, DL, MVT::i32)); + Ops.insert(Ops.end(), SelOps.begin(), SelOps.end()); + i += 2; + } + } + + // Add the glue input back if present. + if (e != InOps.size()) + Ops.push_back(InOps.back()); +} + +/// findGlueUse - Return use of MVT::Glue value produced by the specified +/// SDNode. +/// +static SDNode *findGlueUse(SDNode *N) { + unsigned FlagResNo = N->getNumValues()-1; + for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) { + SDUse &Use = I.getUse(); + if (Use.getResNo() == FlagResNo) + return Use.getUser(); + } + return nullptr; +} + +/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def". +/// This function recursively traverses up the operand chain, ignoring +/// certain nodes. +static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse, + SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited, + bool IgnoreChains) { + // The NodeID's are given uniques ID's where a node ID is guaranteed to be + // greater than all of its (recursive) operands. If we scan to a point where + // 'use' is smaller than the node we're scanning for, then we know we will + // never find it. + // + // The Use may be -1 (unassigned) if it is a newly allocated node. This can + // happen because we scan down to newly selected nodes in the case of glue + // uses. + if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)) + return false; + + // Don't revisit nodes if we already scanned it and didn't fail, we know we + // won't fail if we scan it again. + if (!Visited.insert(Use).second) + return false; + + for (const SDValue &Op : Use->op_values()) { + // Ignore chain uses, they are validated by HandleMergeInputChains. + if (Op.getValueType() == MVT::Other && IgnoreChains) + continue; + + SDNode *N = Op.getNode(); + if (N == Def) { + if (Use == ImmedUse || Use == Root) + continue; // We are not looking for immediate use. + assert(N != Root); + return true; + } + + // Traverse up the operand chain. + if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains)) + return true; + } + return false; +} + +/// IsProfitableToFold - Returns true if it's profitable to fold the specific +/// operand node N of U during instruction selection that starts at Root. +bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U, + SDNode *Root) const { + if (OptLevel == CodeGenOpt::None) return false; + return N.hasOneUse(); +} + +/// IsLegalToFold - Returns true if the specific operand node N of +/// U can be folded during instruction selection that starts at Root. +bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root, + CodeGenOpt::Level OptLevel, + bool IgnoreChains) { + if (OptLevel == CodeGenOpt::None) return false; + + // If Root use can somehow reach N through a path that that doesn't contain + // U then folding N would create a cycle. e.g. In the following + // diagram, Root can reach N through X. If N is folded into into Root, then + // X is both a predecessor and a successor of U. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ / // + // \ / // + // [Root*] // + // + // * indicates nodes to be folded together. + // + // If Root produces glue, then it gets (even more) interesting. Since it + // will be "glued" together with its glue use in the scheduler, we need to + // check if it might reach N. + // + // [N*] // + // ^ ^ // + // / \ // + // [U*] [X]? // + // ^ ^ // + // \ \ // + // \ | // + // [Root*] | // + // ^ | // + // f | // + // | / // + // [Y] / // + // ^ / // + // f / // + // | / // + // [GU] // + // + // If GU (glue use) indirectly reaches N (the load), and Root folds N + // (call it Fold), then X is a predecessor of GU and a successor of + // Fold. But since Fold and GU are glued together, this will create + // a cycle in the scheduling graph. + + // If the node has glue, walk down the graph to the "lowest" node in the + // glueged set. + EVT VT = Root->getValueType(Root->getNumValues()-1); + while (VT == MVT::Glue) { + SDNode *GU = findGlueUse(Root); + if (!GU) + break; + Root = GU; + VT = Root->getValueType(Root->getNumValues()-1); + + // If our query node has a glue result with a use, we've walked up it. If + // the user (which has already been selected) has a chain or indirectly uses + // the chain, our WalkChainUsers predicate will not consider it. Because of + // this, we cannot ignore chains in this predicate. + IgnoreChains = false; + } + + + SmallPtrSet<SDNode*, 16> Visited; + return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains); +} + +SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { + SDLoc DL(N); + + std::vector<SDValue> Ops(N->op_begin(), N->op_end()); + SelectInlineAsmMemoryOperands(Ops, DL); + + const EVT VTs[] = {MVT::Other, MVT::Glue}; + SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode +*SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + unsigned Reg = + TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0), + *CurDAG); + SDValue New = CurDAG->getCopyFromReg( + Op->getOperand(0), dl, Reg, Op->getValueType(0)); + New->setNodeId(-1); + return New.getNode(); +} + +SDNode +*SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) { + SDLoc dl(Op); + MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1)); + const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0)); + unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(), + Op->getOperand(2).getValueType(), + *CurDAG); + SDValue New = CurDAG->getCopyToReg( + Op->getOperand(0), dl, Reg, Op->getOperand(2)); + New->setNodeId(-1); + return New.getNode(); +} + + + +SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) { + return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF,N->getValueType(0)); +} + +/// GetVBR - decode a vbr encoding whose top bit is set. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline uint64_t +GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) { + assert(Val >= 128 && "Not a VBR"); + Val &= 127; // Remove first vbr bit. + + unsigned Shift = 7; + uint64_t NextBits; + do { + NextBits = MatcherTable[Idx++]; + Val |= (NextBits&127) << Shift; + Shift += 7; + } while (NextBits & 128); + + return Val; +} + + +/// UpdateChainsAndGlue - When a match is complete, this method updates uses of +/// interior glue and chain results to use the new glue and chain results. +void SelectionDAGISel:: +UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain, + const SmallVectorImpl<SDNode*> &ChainNodesMatched, + SDValue InputGlue, + const SmallVectorImpl<SDNode*> &GlueResultNodesMatched, + bool isMorphNodeTo) { + SmallVector<SDNode*, 4> NowDeadNodes; + + // Now that all the normal results are replaced, we replace the chain and + // glue results if present. + if (!ChainNodesMatched.empty()) { + assert(InputChain.getNode() && + "Matched input chains but didn't produce a chain"); + // Loop over all of the nodes we matched that produced a chain result. + // Replace all the chain results with the final chain we ended up with. + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + SDNode *ChainNode = ChainNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (ChainNode->getOpcode() == ISD::DELETED_NODE) + continue; + + // Don't replace the results of the root node if we're doing a + // MorphNodeTo. + if (ChainNode == NodeToMatch && isMorphNodeTo) + continue; + + SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1); + if (ChainVal.getValueType() == MVT::Glue) + ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2); + assert(ChainVal.getValueType() == MVT::Other && "Not a chain?"); + CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain); + + // If the node became dead and we haven't already seen it, delete it. + if (ChainNode->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode)) + NowDeadNodes.push_back(ChainNode); + } + } + + // If the result produces glue, update any glue results in the matched + // pattern with the glue result. + if (InputGlue.getNode()) { + // Handle any interior nodes explicitly marked. + for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) { + SDNode *FRN = GlueResultNodesMatched[i]; + + // If this node was already deleted, don't look at it. + if (FRN->getOpcode() == ISD::DELETED_NODE) + continue; + + assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue && + "Doesn't have a glue result"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1), + InputGlue); + + // If the node became dead and we haven't already seen it, delete it. + if (FRN->use_empty() && + !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN)) + NowDeadNodes.push_back(FRN); + } + } + + if (!NowDeadNodes.empty()) + CurDAG->RemoveDeadNodes(NowDeadNodes); + + DEBUG(dbgs() << "ISEL: Match complete!\n"); +} + +enum ChainResult { + CR_Simple, + CR_InducesCycle, + CR_LeadsToInteriorNode +}; + +/// WalkChainUsers - Walk down the users of the specified chained node that is +/// part of the pattern we're matching, looking at all of the users we find. +/// This determines whether something is an interior node, whether we have a +/// non-pattern node in between two pattern nodes (which prevent folding because +/// it would induce a cycle) and whether we have a TokenFactor node sandwiched +/// between pattern nodes (in which case the TF becomes part of the pattern). +/// +/// The walk we do here is guaranteed to be small because we quickly get down to +/// already selected nodes "below" us. +static ChainResult +WalkChainUsers(const SDNode *ChainedNode, + SmallVectorImpl<SDNode*> &ChainedNodesInPattern, + SmallVectorImpl<SDNode*> &InteriorChainedNodes) { + ChainResult Result = CR_Simple; + + for (SDNode::use_iterator UI = ChainedNode->use_begin(), + E = ChainedNode->use_end(); UI != E; ++UI) { + // Make sure the use is of the chain, not some other value we produce. + if (UI.getUse().getValueType() != MVT::Other) continue; + + SDNode *User = *UI; + + if (User->getOpcode() == ISD::HANDLENODE) // Root of the graph. + continue; + + // If we see an already-selected machine node, then we've gone beyond the + // pattern that we're selecting down into the already selected chunk of the + // DAG. + unsigned UserOpcode = User->getOpcode(); + if (User->isMachineOpcode() || + UserOpcode == ISD::CopyToReg || + UserOpcode == ISD::CopyFromReg || + UserOpcode == ISD::INLINEASM || + UserOpcode == ISD::EH_LABEL || + UserOpcode == ISD::LIFETIME_START || + UserOpcode == ISD::LIFETIME_END) { + // If their node ID got reset to -1 then they've already been selected. + // Treat them like a MachineOpcode. + if (User->getNodeId() == -1) + continue; + } + + // If we have a TokenFactor, we handle it specially. + if (User->getOpcode() != ISD::TokenFactor) { + // If the node isn't a token factor and isn't part of our pattern, then it + // must be a random chained node in between two nodes we're selecting. + // This happens when we have something like: + // x = load ptr + // call + // y = x+4 + // store y -> ptr + // Because we structurally match the load/store as a read/modify/write, + // but the call is chained between them. We cannot fold in this case + // because it would induce a cycle in the graph. + if (!std::count(ChainedNodesInPattern.begin(), + ChainedNodesInPattern.end(), User)) + return CR_InducesCycle; + + // Otherwise we found a node that is part of our pattern. For example in: + // x = load ptr + // y = x+4 + // store y -> ptr + // This would happen when we're scanning down from the load and see the + // store as a user. Record that there is a use of ChainedNode that is + // part of the pattern and keep scanning uses. + Result = CR_LeadsToInteriorNode; + InteriorChainedNodes.push_back(User); + continue; + } + + // If we found a TokenFactor, there are two cases to consider: first if the + // TokenFactor is just hanging "below" the pattern we're matching (i.e. no + // uses of the TF are in our pattern) we just want to ignore it. Second, + // the TokenFactor can be sandwiched in between two chained nodes, like so: + // [Load chain] + // ^ + // | + // [Load] + // ^ ^ + // | \ DAG's like cheese + // / \ do you? + // / | + // [TokenFactor] [Op] + // ^ ^ + // | | + // \ / + // \ / + // [Store] + // + // In this case, the TokenFactor becomes part of our match and we rewrite it + // as a new TokenFactor. + // + // To distinguish these two cases, do a recursive walk down the uses. + switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) { + case CR_Simple: + // If the uses of the TokenFactor are just already-selected nodes, ignore + // it, it is "below" our pattern. + continue; + case CR_InducesCycle: + // If the uses of the TokenFactor lead to nodes that are not part of our + // pattern that are not selected, folding would turn this into a cycle, + // bail out now. + return CR_InducesCycle; + case CR_LeadsToInteriorNode: + break; // Otherwise, keep processing. + } + + // Okay, we know we're in the interesting interior case. The TokenFactor + // is now going to be considered part of the pattern so that we rewrite its + // uses (it may have uses that are not part of the pattern) with the + // ultimate chain result of the generated code. We will also add its chain + // inputs as inputs to the ultimate TokenFactor we create. + Result = CR_LeadsToInteriorNode; + ChainedNodesInPattern.push_back(User); + InteriorChainedNodes.push_back(User); + continue; + } + + return Result; +} + +/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains +/// operation for when the pattern matched at least one node with a chains. The +/// input vector contains a list of all of the chained nodes that we match. We +/// must determine if this is a valid thing to cover (i.e. matching it won't +/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will +/// be used as the input node chain for the generated nodes. +static SDValue +HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched, + SelectionDAG *CurDAG) { + // Walk all of the chained nodes we've matched, recursively scanning down the + // users of the chain result. This adds any TokenFactor nodes that are caught + // in between chained nodes to the chained and interior nodes list. + SmallVector<SDNode*, 3> InteriorChainedNodes; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched, + InteriorChainedNodes) == CR_InducesCycle) + return SDValue(); // Would induce a cycle. + } + + // Okay, we have walked all the matched nodes and collected TokenFactor nodes + // that we are interested in. Form our input TokenFactor node. + SmallVector<SDValue, 3> InputChains; + for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) { + // Add the input chain of this node to the InputChains list (which will be + // the operands of the generated TokenFactor) if it's not an interior node. + SDNode *N = ChainNodesMatched[i]; + if (N->getOpcode() != ISD::TokenFactor) { + if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N)) + continue; + + // Otherwise, add the input chain. + SDValue InChain = ChainNodesMatched[i]->getOperand(0); + assert(InChain.getValueType() == MVT::Other && "Not a chain"); + InputChains.push_back(InChain); + continue; + } + + // If we have a token factor, we want to add all inputs of the token factor + // that are not part of the pattern we're matching. + for (const SDValue &Op : N->op_values()) { + if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(), + Op.getNode())) + InputChains.push_back(Op); + } + } + + if (InputChains.size() == 1) + return InputChains[0]; + return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), + MVT::Other, InputChains); +} + +/// MorphNode - Handle morphing a node in place for the selector. +SDNode *SelectionDAGISel:: +MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, + ArrayRef<SDValue> Ops, unsigned EmitNodeInfo) { + // It is possible we're using MorphNodeTo to replace a node with no + // normal results with one that has a normal result (or we could be + // adding a chain) and the input could have glue and chains as well. + // In this case we need to shift the operands down. + // FIXME: This is a horrible hack and broken in obscure cases, no worse + // than the old isel though. + int OldGlueResultNo = -1, OldChainResultNo = -1; + + unsigned NTMNumResults = Node->getNumValues(); + if (Node->getValueType(NTMNumResults-1) == MVT::Glue) { + OldGlueResultNo = NTMNumResults-1; + if (NTMNumResults != 1 && + Node->getValueType(NTMNumResults-2) == MVT::Other) + OldChainResultNo = NTMNumResults-2; + } else if (Node->getValueType(NTMNumResults-1) == MVT::Other) + OldChainResultNo = NTMNumResults-1; + + // Call the underlying SelectionDAG routine to do the transmogrification. Note + // that this deletes operands of the old node that become dead. + SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops); + + // MorphNodeTo can operate in two ways: if an existing node with the + // specified operands exists, it can just return it. Otherwise, it + // updates the node in place to have the requested operands. + if (Res == Node) { + // If we updated the node in place, reset the node ID. To the isel, + // this should be just like a newly allocated machine node. + Res->setNodeId(-1); + } + + unsigned ResNumResults = Res->getNumValues(); + // Move the glue if needed. + if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 && + (unsigned)OldGlueResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo), + SDValue(Res, ResNumResults-1)); + + if ((EmitNodeInfo & OPFL_GlueOutput) != 0) + --ResNumResults; + + // Move the chain reference if needed. + if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && + (unsigned)OldChainResultNo != ResNumResults-1) + CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo), + SDValue(Res, ResNumResults-1)); + + // Otherwise, no replacement happened because the node already exists. Replace + // Uses of the old node with the new one. + if (Res != Node) + CurDAG->ReplaceAllUsesWith(Node, Res); + + return Res; +} + +/// CheckSame - Implements OP_CheckSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { + // Accept if it is exactly the same as a previously recorded node. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + return N == RecordedNodes[RecNo].first; +} + +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + +/// CheckPatternPredicate - Implements OP_CheckPatternPredicate. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + const SelectionDAGISel &SDISel) { + return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]); +} + +/// CheckNodePredicate - Implements OP_CheckNodePredicate. +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, + const SelectionDAGISel &SDISel, SDNode *N) { + return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDNode *N) { + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + return N->getOpcode() == Opc; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, + const TargetLowering *TLI, const DataLayout &DL) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (N.getValueType() == VT) return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(DL); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering *TLI, const DataLayout &DL, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI, + DL); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + return cast<CondCodeSDNode>(N)->get() == + (ISD::CondCode)MatcherTable[MatcherIndex++]; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const TargetLowering *TLI, const DataLayout &DL) { + MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (cast<VTSDNode>(N)->getVT() == VT) + return true; + + // Handle the case when VT is iPTR. + return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI->getPointerTy(DL); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); + return C && C->getSExtValue() == Val; +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckChildInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckInteger(MatcherTable, MatcherIndex, N.getOperand(ChildNo)); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::AND) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C && SDISel.CheckAndMask(N.getOperand(0), C, Val); +} + +LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool +CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, const SelectionDAGISel &SDISel) { + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + + if (N->getOpcode() != ISD::OR) return false; + + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); + return C && SDISel.CheckOrMask(N.getOperand(0), C, Val); +} + +/// IsPredicateKnownToFail - If we know how and can do so without pushing a +/// scope, evaluate the current node. If the current predicate is known to +/// fail, set Result=true and return anything. If the current predicate is +/// known to pass, set Result=false and return the MatcherIndex to continue +/// with. If the current predicate is unknown, set Result=false and return the +/// MatcherIndex to continue with. +static unsigned IsPredicateKnownToFail(const unsigned char *Table, + unsigned Index, SDValue N, + bool &Result, + const SelectionDAGISel &SDISel, + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) { + switch (Table[Index++]) { + default: + Result = false; + return Index-1; // Could not evaluate this predicate. + case SelectionDAGISel::OPC_CheckSame: + Result = !::CheckSame(Table, Index, N, RecordedNodes); + return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; + case SelectionDAGISel::OPC_CheckPatternPredicate: + Result = !::CheckPatternPredicate(Table, Index, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckPredicate: + Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckOpcode: + Result = !::CheckOpcode(Table, Index, N.getNode()); + return Index; + case SelectionDAGISel::OPC_CheckType: + Result = !::CheckType(Table, Index, N, SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + case SelectionDAGISel::OPC_CheckChild0Type: + case SelectionDAGISel::OPC_CheckChild1Type: + case SelectionDAGISel::OPC_CheckChild2Type: + case SelectionDAGISel::OPC_CheckChild3Type: + case SelectionDAGISel::OPC_CheckChild4Type: + case SelectionDAGISel::OPC_CheckChild5Type: + case SelectionDAGISel::OPC_CheckChild6Type: + case SelectionDAGISel::OPC_CheckChild7Type: + Result = !::CheckChildType( + Table, Index, N, SDISel.TLI, SDISel.CurDAG->getDataLayout(), + Table[Index - 1] - SelectionDAGISel::OPC_CheckChild0Type); + return Index; + case SelectionDAGISel::OPC_CheckCondCode: + Result = !::CheckCondCode(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckValueType: + Result = !::CheckValueType(Table, Index, N, SDISel.TLI, + SDISel.CurDAG->getDataLayout()); + return Index; + case SelectionDAGISel::OPC_CheckInteger: + Result = !::CheckInteger(Table, Index, N); + return Index; + case SelectionDAGISel::OPC_CheckChild0Integer: + case SelectionDAGISel::OPC_CheckChild1Integer: + case SelectionDAGISel::OPC_CheckChild2Integer: + case SelectionDAGISel::OPC_CheckChild3Integer: + case SelectionDAGISel::OPC_CheckChild4Integer: + Result = !::CheckChildInteger(Table, Index, N, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Integer); + return Index; + case SelectionDAGISel::OPC_CheckAndImm: + Result = !::CheckAndImm(Table, Index, N, SDISel); + return Index; + case SelectionDAGISel::OPC_CheckOrImm: + Result = !::CheckOrImm(Table, Index, N, SDISel); + return Index; + } +} + +namespace { + +struct MatchScope { + /// FailIndex - If this match fails, this is the index to continue with. + unsigned FailIndex; + + /// NodeStack - The node stack when the scope was formed. + SmallVector<SDValue, 4> NodeStack; + + /// NumRecordedNodes - The number of recorded nodes when the scope was formed. + unsigned NumRecordedNodes; + + /// NumMatchedMemRefs - The number of matched memref entries. + unsigned NumMatchedMemRefs; + + /// InputChain/InputGlue - The current chain/glue + SDValue InputChain, InputGlue; + + /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty. + bool HasChainNodesMatched, HasGlueResultNodesMatched; +}; + +/// \\brief A DAG update listener to keep the matching state +/// (i.e. RecordedNodes and MatchScope) uptodate if the target is allowed to +/// change the DAG while matching. X86 addressing mode matcher is an example +/// for this. +class MatchStateUpdater : public SelectionDAG::DAGUpdateListener +{ + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes; + SmallVectorImpl<MatchScope> &MatchScopes; +public: + MatchStateUpdater(SelectionDAG &DAG, + SmallVectorImpl<std::pair<SDValue, SDNode*> > &RN, + SmallVectorImpl<MatchScope> &MS) : + SelectionDAG::DAGUpdateListener(DAG), + RecordedNodes(RN), MatchScopes(MS) { } + + void NodeDeleted(SDNode *N, SDNode *E) override { + // Some early-returns here to avoid the search if we deleted the node or + // if the update comes from MorphNodeTo (MorphNodeTo is the last thing we + // do, so it's unnecessary to update matching state at that point). + // Neither of these can occur currently because we only install this + // update listener during matching a complex patterns. + if (!E || E->isMachineOpcode()) + return; + // Performing linear search here does not matter because we almost never + // run this code. You'd have to have a CSE during complex pattern + // matching. + for (auto &I : RecordedNodes) + if (I.first.getNode() == N) + I.first.setNode(E); + + for (auto &I : MatchScopes) + for (auto &J : I.NodeStack) + if (J.getNode() == N) + J.setNode(E); + } +}; +} + +SDNode *SelectionDAGISel:: +SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, + unsigned TableSize) { + // FIXME: Should these even be selected? Handle these cases in the caller? + switch (NodeToMatch->getOpcode()) { + default: + break; + case ISD::EntryToken: // These nodes remain the same. + case ISD::BasicBlock: + case ISD::Register: + case ISD::RegisterMask: + case ISD::HANDLENODE: + case ISD::MDNODE_SDNODE: + case ISD::TargetConstant: + case ISD::TargetConstantFP: + case ISD::TargetConstantPool: + case ISD::TargetFrameIndex: + case ISD::TargetExternalSymbol: + case ISD::MCSymbol: + case ISD::TargetBlockAddress: + case ISD::TargetJumpTable: + case ISD::TargetGlobalTLSAddress: + case ISD::TargetGlobalAddress: + case ISD::TokenFactor: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::EH_LABEL: + case ISD::LIFETIME_START: + case ISD::LIFETIME_END: + NodeToMatch->setNodeId(-1); // Mark selected. + return nullptr; + case ISD::AssertSext: + case ISD::AssertZext: + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0), + NodeToMatch->getOperand(0)); + return nullptr; + case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch); + case ISD::READ_REGISTER: return Select_READ_REGISTER(NodeToMatch); + case ISD::WRITE_REGISTER: return Select_WRITE_REGISTER(NodeToMatch); + case ISD::UNDEF: return Select_UNDEF(NodeToMatch); + } + + assert(!NodeToMatch->isMachineOpcode() && "Node already selected!"); + + // Set up the node stack with NodeToMatch as the only node on the stack. + SmallVector<SDValue, 8> NodeStack; + SDValue N = SDValue(NodeToMatch, 0); + NodeStack.push_back(N); + + // MatchScopes - Scopes used when matching, if a match failure happens, this + // indicates where to continue checking. + SmallVector<MatchScope, 8> MatchScopes; + + // RecordedNodes - This is the set of nodes that have been recorded by the + // state machine. The second value is the parent of the node, or null if the + // root is recorded. + SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes; + + // MatchedMemRefs - This is the set of MemRef's we've seen in the input + // pattern. + SmallVector<MachineMemOperand*, 2> MatchedMemRefs; + + // These are the current input chain and glue for use when generating nodes. + // Various Emit operations change these. For example, emitting a copytoreg + // uses and updates these. + SDValue InputChain, InputGlue; + + // ChainNodesMatched - If a pattern matches nodes that have input/output + // chains, the OPC_EmitMergeInputChains operation is emitted which indicates + // which ones they are. The result is captured into this list so that we can + // update the chain results when the pattern is complete. + SmallVector<SDNode*, 3> ChainNodesMatched; + SmallVector<SDNode*, 3> GlueResultNodesMatched; + + DEBUG(dbgs() << "ISEL: Starting pattern match on root node: "; + NodeToMatch->dump(CurDAG); + dbgs() << '\n'); + + // Determine where to start the interpreter. Normally we start at opcode #0, + // but if the state machine starts with an OPC_SwitchOpcode, then we + // accelerate the first lookup (which is guaranteed to be hot) with the + // OpcodeOffset table. + unsigned MatcherIndex = 0; + + if (!OpcodeOffset.empty()) { + // Already computed the OpcodeOffset table, just index into it. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + DEBUG(dbgs() << " Initial Opcode index to " << MatcherIndex << "\n"); + + } else if (MatcherTable[0] == OPC_SwitchOpcode) { + // Otherwise, the table isn't computed, but the state machine does start + // with an OPC_SwitchOpcode instruction. Populate the table now, since this + // is the first time we're selecting an instruction. + unsigned Idx = 1; + while (1) { + // Get the size of this case. + unsigned CaseSize = MatcherTable[Idx++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, Idx); + if (CaseSize == 0) break; + + // Get the opcode, add the index to the table. + uint16_t Opc = MatcherTable[Idx++]; + Opc |= (unsigned short)MatcherTable[Idx++] << 8; + if (Opc >= OpcodeOffset.size()) + OpcodeOffset.resize((Opc+1)*2); + OpcodeOffset[Opc] = Idx; + Idx += CaseSize; + } + + // Okay, do the lookup for the first opcode. + if (N.getOpcode() < OpcodeOffset.size()) + MatcherIndex = OpcodeOffset[N.getOpcode()]; + } + + while (1) { + assert(MatcherIndex < TableSize && "Invalid index"); +#ifndef NDEBUG + unsigned CurrentOpcodeIndex = MatcherIndex; +#endif + BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++]; + switch (Opcode) { + case OPC_Scope: { + // Okay, the semantics of this operation are that we should push a scope + // then evaluate the first child. However, pushing a scope only to have + // the first check fail (which then pops it) is inefficient. If we can + // determine immediately that the first check (or first several) will + // immediately fail, don't even bother pushing a scope for them. + unsigned FailIndex; + + while (1) { + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + // Found the end of the scope with no match. + if (NumToSkip == 0) { + FailIndex = 0; + break; + } + + FailIndex = MatcherIndex+NumToSkip; + + unsigned MatcherIndexOfPredicate = MatcherIndex; + (void)MatcherIndexOfPredicate; // silence warning. + + // If we can't evaluate this predicate without pushing a scope (e.g. if + // it is a 'MoveParent') or if the predicate succeeds on this node, we + // push the scope and evaluate the full predicate chain. + bool Result; + MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N, + Result, *this, RecordedNodes); + if (!Result) + break; + + DEBUG(dbgs() << " Skipped scope entry (due to false predicate) at " + << "index " << MatcherIndexOfPredicate + << ", continuing at " << FailIndex << "\n"); + ++NumDAGIselRetries; + + // Otherwise, we know that this case of the Scope is guaranteed to fail, + // move to the next case. + MatcherIndex = FailIndex; + } + + // If the whole scope failed to match, bail. + if (FailIndex == 0) break; + + // Push a MatchScope which indicates where to go if the first child fails + // to match. + MatchScope NewEntry; + NewEntry.FailIndex = FailIndex; + NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end()); + NewEntry.NumRecordedNodes = RecordedNodes.size(); + NewEntry.NumMatchedMemRefs = MatchedMemRefs.size(); + NewEntry.InputChain = InputChain; + NewEntry.InputGlue = InputGlue; + NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty(); + NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty(); + MatchScopes.push_back(NewEntry); + continue; + } + case OPC_RecordNode: { + // Remember this node, it may end up being an operand in the pattern. + SDNode *Parent = nullptr; + if (NodeStack.size() > 1) + Parent = NodeStack[NodeStack.size()-2].getNode(); + RecordedNodes.push_back(std::make_pair(N, Parent)); + continue; + } + + case OPC_RecordChild0: case OPC_RecordChild1: + case OPC_RecordChild2: case OPC_RecordChild3: + case OPC_RecordChild4: case OPC_RecordChild5: + case OPC_RecordChild6: case OPC_RecordChild7: { + unsigned ChildNo = Opcode-OPC_RecordChild0; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + + RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo), + N.getNode())); + continue; + } + case OPC_RecordMemRef: + MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand()); + continue; + + case OPC_CaptureGlueInput: + // If the current node has an input glue, capture it in InputGlue. + if (N->getNumOperands() != 0 && + N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) + InputGlue = N->getOperand(N->getNumOperands()-1); + continue; + + case OPC_MoveChild: { + unsigned ChildNo = MatcherTable[MatcherIndex++]; + if (ChildNo >= N.getNumOperands()) + break; // Match fails if out of range child #. + N = N.getOperand(ChildNo); + NodeStack.push_back(N); + continue; + } + + case OPC_MoveParent: + // Pop the current node off the NodeStack. + NodeStack.pop_back(); + assert(!NodeStack.empty() && "Node stack imbalance!"); + N = NodeStack.back(); + continue; + + case OPC_CheckSame: + if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; + continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + + case OPC_CheckPatternPredicate: + if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; + continue; + case OPC_CheckPredicate: + if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this, + N.getNode())) + break; + continue; + case OPC_CheckComplexPat: { + unsigned CPNum = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat"); + + // If target can modify DAG during matching, keep the matching state + // consistent. + std::unique_ptr<MatchStateUpdater> MSU; + if (ComplexPatternFuncMutatesDAG()) + MSU.reset(new MatchStateUpdater(*CurDAG, RecordedNodes, + MatchScopes)); + + if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second, + RecordedNodes[RecNo].first, CPNum, + RecordedNodes)) + break; + continue; + } + case OPC_CheckOpcode: + if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break; + continue; + + case OPC_CheckType: + if (!::CheckType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout())) + break; + continue; + + case OPC_SwitchOpcode: { + unsigned CurNodeOpcode = N.getOpcode(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + uint16_t Opc = MatcherTable[MatcherIndex++]; + Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + + // If the opcode matches, then we will execute this case. + if (CurNodeOpcode == Opc) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(dbgs() << " OpcodeSwitch from " << SwitchStart + << " to " << MatcherIndex << "\n"); + continue; + } + + case OPC_SwitchType: { + MVT CurNodeVT = N.getSimpleValueType(); + unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; + unsigned CaseSize; + while (1) { + // Get the size of this case. + CaseSize = MatcherTable[MatcherIndex++]; + if (CaseSize & 128) + CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex); + if (CaseSize == 0) break; + + MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (CaseVT == MVT::iPTR) + CaseVT = TLI->getPointerTy(CurDAG->getDataLayout()); + + // If the VT matches, then we will execute this case. + if (CurNodeVT == CaseVT) + break; + + // Otherwise, skip over this case. + MatcherIndex += CaseSize; + } + + // If no cases matched, bail out. + if (CaseSize == 0) break; + + // Otherwise, execute the case we found. + DEBUG(dbgs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString() + << "] from " << SwitchStart << " to " << MatcherIndex<<'\n'); + continue; + } + case OPC_CheckChild0Type: case OPC_CheckChild1Type: + case OPC_CheckChild2Type: case OPC_CheckChild3Type: + case OPC_CheckChild4Type: case OPC_CheckChild5Type: + case OPC_CheckChild6Type: case OPC_CheckChild7Type: + if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout(), + Opcode - OPC_CheckChild0Type)) + break; + continue; + case OPC_CheckCondCode: + if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckValueType: + if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI, + CurDAG->getDataLayout())) + break; + continue; + case OPC_CheckInteger: + if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; + continue; + case OPC_CheckChild0Integer: case OPC_CheckChild1Integer: + case OPC_CheckChild2Integer: case OPC_CheckChild3Integer: + case OPC_CheckChild4Integer: + if (!::CheckChildInteger(MatcherTable, MatcherIndex, N, + Opcode-OPC_CheckChild0Integer)) break; + continue; + case OPC_CheckAndImm: + if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + case OPC_CheckOrImm: + if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; + continue; + + case OPC_CheckFoldableChainNode: { + assert(NodeStack.size() != 1 && "No parent node"); + // Verify that all intermediate nodes between the root and this one have + // a single use. + bool HasMultipleUses = false; + for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i) + if (!NodeStack[i].hasOneUse()) { + HasMultipleUses = true; + break; + } + if (HasMultipleUses) break; + + // Check to see that the target thinks this is profitable to fold and that + // we can fold it without inducing cycles in the graph. + if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch) || + !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(), + NodeToMatch, OptLevel, + true/*We validate our own chains*/)) + break; + + continue; + } + case OPC_EmitInteger: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + int64_t Val = MatcherTable[MatcherIndex++]; + if (Val & 128) + Val = GetVBR(Val, MatcherTable, MatcherIndex); + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getTargetConstant(Val, SDLoc(NodeToMatch), + VT), nullptr)); + continue; + } + case OPC_EmitRegister: { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), nullptr)); + continue; + } + case OPC_EmitRegister2: { + // For targets w/ more than 256 register names, the register enum + // values are stored in two bytes in the matcher table (just like + // opcodes). + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RegNo |= MatcherTable[MatcherIndex++] << 8; + RecordedNodes.push_back(std::pair<SDValue, SDNode*>( + CurDAG->getRegister(RegNo, VT), nullptr)); + continue; + } + + case OPC_EmitConvertToTarget: { + // Convert from IMM/FPIMM to target version. + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); + SDValue Imm = RecordedNodes[RecNo].first; + + if (Imm->getOpcode() == ISD::Constant) { + const ConstantInt *Val=cast<ConstantSDNode>(Imm)->getConstantIntValue(); + Imm = CurDAG->getConstant(*Val, SDLoc(NodeToMatch), Imm.getValueType(), + true); + } else if (Imm->getOpcode() == ISD::ConstantFP) { + const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue(); + Imm = CurDAG->getConstantFP(*Val, SDLoc(NodeToMatch), + Imm.getValueType(), true); + } + + RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); + continue; + } + + case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0 + case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1 + // These are space-optimized forms of OPC_EmitMergeInputChains. + assert(!InputChain.getNode() && + "EmitMergeInputChains should be the first chain producing node"); + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].first.hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (!InputChain.getNode()) + break; // Failed to merge. + continue; + } + + case OPC_EmitMergeInputChains: { + assert(!InputChain.getNode() && + "EmitMergeInputChains should be the first chain producing node"); + // This node gets a list of nodes we matched in the input that have + // chains. We want to token factor all of the input chains to these nodes + // together. However, if any of the input chains is actually one of the + // nodes matched in this pattern, then we have an intra-match reference. + // Ignore these because the newly token factored chain should not refer to + // the old nodes. + unsigned NumChains = MatcherTable[MatcherIndex++]; + assert(NumChains != 0 && "Can't TF zero chains"); + + assert(ChainNodesMatched.empty() && + "Should only have one EmitMergeInputChains per match"); + + // Read all of the chained nodes. + for (unsigned i = 0; i != NumChains; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); + ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + + // FIXME: What if other value results of the node have uses not matched + // by this pattern? + if (ChainNodesMatched.back() != NodeToMatch && + !RecordedNodes[RecNo].first.hasOneUse()) { + ChainNodesMatched.clear(); + break; + } + } + + // If the inner loop broke out, the match fails. + if (ChainNodesMatched.empty()) + break; + + // Merge the input chains if they are not intra-pattern references. + InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG); + + if (!InputChain.getNode()) + break; // Failed to merge. + + continue; + } + + case OPC_EmitCopyToReg: { + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); + unsigned DestPhysReg = MatcherTable[MatcherIndex++]; + + if (!InputChain.getNode()) + InputChain = CurDAG->getEntryNode(); + + InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), + DestPhysReg, RecordedNodes[RecNo].first, + InputGlue); + + InputGlue = InputChain.getValue(1); + continue; + } + + case OPC_EmitNodeXForm: { + unsigned XFormNo = MatcherTable[MatcherIndex++]; + unsigned RecNo = MatcherTable[MatcherIndex++]; + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); + SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, nullptr)); + continue; + } + + case OPC_EmitNode: + case OPC_MorphNodeTo: { + uint16_t TargetOpc = MatcherTable[MatcherIndex++]; + TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8; + unsigned EmitNodeInfo = MatcherTable[MatcherIndex++]; + // Get the result VT list. + unsigned NumVTs = MatcherTable[MatcherIndex++]; + SmallVector<EVT, 4> VTs; + for (unsigned i = 0; i != NumVTs; ++i) { + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + if (VT == MVT::iPTR) + VT = TLI->getPointerTy(CurDAG->getDataLayout()).SimpleTy; + VTs.push_back(VT); + } + + if (EmitNodeInfo & OPFL_Chain) + VTs.push_back(MVT::Other); + if (EmitNodeInfo & OPFL_GlueOutput) + VTs.push_back(MVT::Glue); + + // This is hot code, so optimize the two most common cases of 1 and 2 + // results. + SDVTList VTList; + if (VTs.size() == 1) + VTList = CurDAG->getVTList(VTs[0]); + else if (VTs.size() == 2) + VTList = CurDAG->getVTList(VTs[0], VTs[1]); + else + VTList = CurDAG->getVTList(VTs); + + // Get the operand list. + unsigned NumOps = MatcherTable[MatcherIndex++]; + SmallVector<SDValue, 8> Ops; + for (unsigned i = 0; i != NumOps; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid EmitNode"); + Ops.push_back(RecordedNodes[RecNo].first); + } + + // If there are variadic operands to add, handle them now. + if (EmitNodeInfo & OPFL_VariadicInfo) { + // Determine the start index to copy from. + unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo); + FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0; + assert(NodeToMatch->getNumOperands() >= FirstOpToCopy && + "Invalid variadic node"); + // Copy all of the variadic operands, not including a potential glue + // input. + for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands(); + i != e; ++i) { + SDValue V = NodeToMatch->getOperand(i); + if (V.getValueType() == MVT::Glue) break; + Ops.push_back(V); + } + } + + // If this has chain/glue inputs, add them. + if (EmitNodeInfo & OPFL_Chain) + Ops.push_back(InputChain); + if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != nullptr) + Ops.push_back(InputGlue); + + // Create the node. + SDNode *Res = nullptr; + if (Opcode != OPC_MorphNodeTo) { + // If this is a normal EmitNode command, just create the new node and + // add the results to the RecordedNodes list. + Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), + VTList, Ops); + + // Add all the non-glue/non-chain results to the RecordedNodes list. + for (unsigned i = 0, e = VTs.size(); i != e; ++i) { + if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break; + RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i), + nullptr)); + } + + } else if (NodeToMatch->getOpcode() != ISD::DELETED_NODE) { + Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops, EmitNodeInfo); + } else { + // NodeToMatch was eliminated by CSE when the target changed the DAG. + // We will visit the equivalent node later. + DEBUG(dbgs() << "Node was eliminated by CSE\n"); + return nullptr; + } + + // If the node had chain/glue results, update our notion of the current + // chain and glue. + if (EmitNodeInfo & OPFL_GlueOutput) { + InputGlue = SDValue(Res, VTs.size()-1); + if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-2); + } else if (EmitNodeInfo & OPFL_Chain) + InputChain = SDValue(Res, VTs.size()-1); + + // If the OPFL_MemRefs glue is set on this node, slap all of the + // accumulated memrefs onto it. + // + // FIXME: This is vastly incorrect for patterns with multiple outputs + // instructions that access memory and for ComplexPatterns that match + // loads. + if (EmitNodeInfo & OPFL_MemRefs) { + // Only attach load or store memory operands if the generated + // instruction may load or store. + const MCInstrDesc &MCID = TII->get(TargetOpc); + bool mayLoad = MCID.mayLoad(); + bool mayStore = MCID.mayStore(); + + unsigned NumMemRefs = 0; + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + ++NumMemRefs; + } else if ((*I)->isStore()) { + if (mayStore) + ++NumMemRefs; + } else { + ++NumMemRefs; + } + } + + MachineSDNode::mmo_iterator MemRefs = + MF->allocateMemRefsArray(NumMemRefs); + + MachineSDNode::mmo_iterator MemRefsPos = MemRefs; + for (SmallVectorImpl<MachineMemOperand *>::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + if ((*I)->isLoad()) { + if (mayLoad) + *MemRefsPos++ = *I; + } else if ((*I)->isStore()) { + if (mayStore) + *MemRefsPos++ = *I; + } else { + *MemRefsPos++ = *I; + } + } + + cast<MachineSDNode>(Res) + ->setMemRefs(MemRefs, MemRefs + NumMemRefs); + } + + DEBUG(dbgs() << " " + << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created") + << " node: "; Res->dump(CurDAG); dbgs() << "\n"); + + // If this was a MorphNodeTo then we're completely done! + if (Opcode == OPC_MorphNodeTo) { + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, true); + return Res; + } + + continue; + } + + case OPC_MarkGlueResults: { + unsigned NumNodes = MatcherTable[MatcherIndex++]; + + // Read and remember all the glue-result nodes. + for (unsigned i = 0; i != NumNodes; ++i) { + unsigned RecNo = MatcherTable[MatcherIndex++]; + if (RecNo & 128) + RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); + + assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); + GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); + } + continue; + } + + case OPC_CompleteMatch: { + // The match has been completed, and any new nodes (if any) have been + // created. Patch up references to the matched dag to use the newly + // created nodes. + unsigned NumResults = MatcherTable[MatcherIndex++]; + + for (unsigned i = 0; i != NumResults; ++i) { + unsigned ResSlot = MatcherTable[MatcherIndex++]; + if (ResSlot & 128) + ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); + + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); + SDValue Res = RecordedNodes[ResSlot].first; + + assert(i < NodeToMatch->getNumValues() && + NodeToMatch->getValueType(i) != MVT::Other && + NodeToMatch->getValueType(i) != MVT::Glue && + "Invalid number of results to complete!"); + assert((NodeToMatch->getValueType(i) == Res.getValueType() || + NodeToMatch->getValueType(i) == MVT::iPTR || + Res.getValueType() == MVT::iPTR || + NodeToMatch->getValueType(i).getSizeInBits() == + Res.getValueType().getSizeInBits()) && + "invalid replacement"); + CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res); + } + + // If the root node defines glue, add it to the glue nodes to update list. + if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue) + GlueResultNodesMatched.push_back(NodeToMatch); + + // Update chain and glue uses. + UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched, + InputGlue, GlueResultNodesMatched, false); + + assert(NodeToMatch->use_empty() && + "Didn't replace all uses of the node?"); + + // FIXME: We just return here, which interacts correctly with SelectRoot + // above. We should fix this to not return an SDNode* anymore. + return nullptr; + } + } + + // If the code reached this point, then the match failed. See if there is + // another child to try in the current 'Scope', otherwise pop it until we + // find a case to check. + DEBUG(dbgs() << " Match failed at index " << CurrentOpcodeIndex << "\n"); + ++NumDAGIselRetries; + while (1) { + if (MatchScopes.empty()) { + CannotYetSelect(NodeToMatch); + return nullptr; + } + + // Restore the interpreter state back to the point where the scope was + // formed. + MatchScope &LastScope = MatchScopes.back(); + RecordedNodes.resize(LastScope.NumRecordedNodes); + NodeStack.clear(); + NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end()); + N = NodeStack.back(); + + if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size()) + MatchedMemRefs.resize(LastScope.NumMatchedMemRefs); + MatcherIndex = LastScope.FailIndex; + + DEBUG(dbgs() << " Continuing at " << MatcherIndex << "\n"); + + InputChain = LastScope.InputChain; + InputGlue = LastScope.InputGlue; + if (!LastScope.HasChainNodesMatched) + ChainNodesMatched.clear(); + if (!LastScope.HasGlueResultNodesMatched) + GlueResultNodesMatched.clear(); + + // Check to see what the offset is at the new MatcherIndex. If it is zero + // we have reached the end of this scope, otherwise we have another child + // in the current scope to try. + unsigned NumToSkip = MatcherTable[MatcherIndex++]; + if (NumToSkip & 128) + NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex); + + // If we have another child in this scope to match, update FailIndex and + // try it. + if (NumToSkip != 0) { + LastScope.FailIndex = MatcherIndex+NumToSkip; + break; + } + + // End of this scope, pop it and try the next child in the containing + // scope. + MatchScopes.pop_back(); + } + } +} + + + +void SelectionDAGISel::CannotYetSelect(SDNode *N) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Cannot select: "; + + if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN && + N->getOpcode() != ISD::INTRINSIC_WO_CHAIN && + N->getOpcode() != ISD::INTRINSIC_VOID) { + N->printrFull(Msg, CurDAG); + Msg << "\nIn function: " << MF->getName(); + } else { + bool HasInputChain = N->getOperand(0).getValueType() == MVT::Other; + unsigned iid = + cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue(); + if (iid < Intrinsic::num_intrinsics) + Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid); + else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo()) + Msg << "target intrinsic %" << TII->getName(iid); + else + Msg << "unknown intrinsic #" << iid; + } + report_fatal_error(Msg.str()); +} + +char SelectionDAGISel::ID = 0; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp new file mode 100644 index 0000000..2764688 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -0,0 +1,307 @@ +//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the SelectionDAG::viewGraph method. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SelectionDAG.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "dag-printer" + +namespace llvm { + template<> + struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits { + + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static bool hasEdgeDestLabels() { + return true; + } + + static unsigned numEdgeDestLabels(const void *Node) { + return ((const SDNode *) Node)->getNumValues(); + } + + static std::string getEdgeDestLabel(const void *Node, unsigned i) { + return ((const SDNode *) Node)->getValueType(i).getEVTString(); + } + + template<typename EdgeIter> + static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) { + return itostr(I - SDNodeIterator::begin((const SDNode *) Node)); + } + + /// edgeTargetsEdgeSource - This method returns true if this outgoing edge + /// should actually target another edge source, not a node. If this method + /// is implemented, getEdgeTarget should be implemented. + template<typename EdgeIter> + static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { + return true; + } + + /// getEdgeTarget - If edgeTargetsEdgeSource returns true, this method is + /// called to determine which outgoing edge of Node is the target of this + /// edge. + template<typename EdgeIter> + static EdgeIter getEdgeTarget(const void *Node, EdgeIter I) { + SDNode *TargetNode = *I; + SDNodeIterator NI = SDNodeIterator::begin(TargetNode); + std::advance(NI, I.getNode()->getOperand(I.getOperand()).getResNo()); + return NI; + } + + static std::string getGraphName(const SelectionDAG *G) { + return G->getMachineFunction().getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static std::string getNodeIdentifierLabel(const SDNode *Node, + const SelectionDAG *Graph) { + std::string R; + raw_string_ostream OS(R); +#ifndef NDEBUG + OS << 't' << Node->PersistentId; +#else + OS << static_cast<const void *>(Node); +#endif + return R; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + template<typename EdgeIter> + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const SelectionDAG *Graph) { + SDValue Op = EI.getNode()->getOperand(EI.getOperand()); + EVT VT = Op.getValueType(); + if (VT == MVT::Glue) + return "color=red,style=bold"; + else if (VT == MVT::Other) + return "color=blue,style=dashed"; + return ""; + } + + + static std::string getSimpleNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Result = Node->getOperationName(G); + { + raw_string_ostream OS(Result); + Node->print_details(OS, G); + } + return Result; + } + std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph); + static std::string getNodeAttributes(const SDNode *N, + const SelectionDAG *Graph) { +#ifndef NDEBUG + const std::string &Attrs = Graph->getGraphAttrs(N); + if (!Attrs.empty()) { + if (Attrs.find("shape=") == std::string::npos) + return std::string("shape=Mrecord,") + Attrs; + else + return Attrs; + } +#endif + return "shape=Mrecord"; + } + + static void addCustomGraphFeatures(SelectionDAG *G, + GraphWriter<SelectionDAG*> &GW) { + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); + if (G->getRoot().getNode()) + GW.emitEdge(nullptr, -1, G->getRoot().getNode(), G->getRoot().getResNo(), + "color=blue,style=dashed"); + } + }; +} + +std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G); +} + + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void SelectionDAG::viewGraph(const std::string &Title) { +// This code is only for debugging! +#ifndef NDEBUG + ViewGraph(this, "dag." + getMachineFunction().getName(), + false, Title); +#else + errs() << "SelectionDAG::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +// This overload is defined out-of-line here instead of just using a +// default parameter because this is easiest for gdb to call. +void SelectionDAG::viewGraph() { + viewGraph(""); +} + +/// clearGraphAttrs - Clear all previously defined node graph attributes. +/// Intended to be used from a debugging tool (eg. gdb). +void SelectionDAG::clearGraphAttrs() { +#ifndef NDEBUG + NodeGraphAttrs.clear(); +#else + errs() << "SelectionDAG::clearGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// setGraphAttrs - Set graph attributes for a node. (eg. "color=red".) +/// +void SelectionDAG::setGraphAttrs(const SDNode *N, const char *Attrs) { +#ifndef NDEBUG + NodeGraphAttrs[N] = Attrs; +#else + errs() << "SelectionDAG::setGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + + +/// getGraphAttrs - Get graph attributes for a node. (eg. "color=red".) +/// Used from getNodeAttributes. +const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const { +#ifndef NDEBUG + std::map<const SDNode *, std::string>::const_iterator I = + NodeGraphAttrs.find(N); + + if (I != NodeGraphAttrs.end()) + return I->second; + else + return ""; +#else + errs() << "SelectionDAG::getGraphAttrs is only available in debug builds" + << " on systems with Graphviz or gv!\n"; + return std::string(); +#endif +} + +/// setGraphColor - Convenience for setting node color attribute. +/// +void SelectionDAG::setGraphColor(const SDNode *N, const char *Color) { +#ifndef NDEBUG + NodeGraphAttrs[N] = std::string("color=") + Color; +#else + errs() << "SelectionDAG::setGraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +/// setSubgraphColorHelper - Implement setSubgraphColor. Return +/// whether we truncated the search. +/// +bool SelectionDAG::setSubgraphColorHelper(SDNode *N, const char *Color, DenseSet<SDNode *> &visited, + int level, bool &printed) { + bool hit_limit = false; + +#ifndef NDEBUG + if (level >= 20) { + if (!printed) { + printed = true; + DEBUG(dbgs() << "setSubgraphColor hit max level\n"); + } + return true; + } + + unsigned oldSize = visited.size(); + visited.insert(N); + if (visited.size() != oldSize) { + setGraphColor(N, Color); + for(SDNodeIterator i = SDNodeIterator::begin(N), iend = SDNodeIterator::end(N); + i != iend; + ++i) { + hit_limit = setSubgraphColorHelper(*i, Color, visited, level+1, printed) || hit_limit; + } + } +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif + return hit_limit; +} + +/// setSubgraphColor - Convenience for setting subgraph color attribute. +/// +void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) { +#ifndef NDEBUG + DenseSet<SDNode *> visited; + bool printed = false; + if (setSubgraphColorHelper(N, Color, visited, 0, printed)) { + // Visually mark that we hit the limit + if (strcmp(Color, "red") == 0) { + setSubgraphColorHelper(N, "blue", visited, 0, printed); + } else if (strcmp(Color, "yellow") == 0) { + setSubgraphColorHelper(N, "green", visited, 0, printed); + } + } + +#else + errs() << "SelectionDAG::setSubgraphColor is only available in debug builds" + << " on systems with Graphviz or gv!\n"; +#endif +} + +std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { + std::string s; + raw_string_ostream O(s); + O << "SU(" << SU->NodeNum << "): "; + if (SU->getNode()) { + SmallVector<SDNode *, 4> GluedNodes; + for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) + GluedNodes.push_back(N); + while (!GluedNodes.empty()) { + O << DOTGraphTraits<SelectionDAG*> + ::getSimpleNodeLabel(GluedNodes.back(), DAG); + GluedNodes.pop_back(); + if (!GluedNodes.empty()) + O << "\n "; + } + } else { + O << "CROSS RC COPY"; + } + return O.str(); +} + +void ScheduleDAGSDNodes::getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const { + if (DAG) { + // Draw a special "GraphRoot" node to indicate the root of the graph. + GW.emitSimpleNode(nullptr, "plaintext=circle", "GraphRoot"); + const SDNode *N = DAG->getRoot().getNode(); + if (N && N->getNodeId() != -1) + GW.emitEdge(nullptr, -1, &SUnits[N->getNodeId()], -1, + "color=blue,style=dashed"); + } +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp new file mode 100644 index 0000000..6547a62 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -0,0 +1,906 @@ +//===-- StatepointLowering.cpp - SDAGBuilder's statepoint code -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#include "StatepointLowering.h" +#include "SelectionDAGBuilder.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Statepoint.h" +#include "llvm/Target/TargetLowering.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "statepoint-lowering" + +STATISTIC(NumSlotsAllocatedForStatepoints, + "Number of stack slots allocated for statepoints"); +STATISTIC(NumOfStatepoints, "Number of statepoint nodes encountered"); +STATISTIC(StatepointMaxSlotsRequired, + "Maximum number of stack slots required for a singe statepoint"); + +static void pushStackMapConstant(SmallVectorImpl<SDValue>& Ops, + SelectionDAGBuilder &Builder, uint64_t Value) { + SDLoc L = Builder.getCurSDLoc(); + Ops.push_back(Builder.DAG.getTargetConstant(StackMaps::ConstantOp, L, + MVT::i64)); + Ops.push_back(Builder.DAG.getTargetConstant(Value, L, MVT::i64)); +} + +void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) { + // Consistency check + assert(PendingGCRelocateCalls.empty() && + "Trying to visit statepoint before finished processing previous one"); + Locations.clear(); + NextSlotToAllocate = 0; + // Need to resize this on each safepoint - we need the two to stay in + // sync and the clear patterns of a SelectionDAGBuilder have no relation + // to FunctionLoweringInfo. + AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size()); + for (size_t i = 0; i < AllocatedStackSlots.size(); i++) { + AllocatedStackSlots[i] = false; + } +} + +void StatepointLoweringState::clear() { + Locations.clear(); + AllocatedStackSlots.clear(); + assert(PendingGCRelocateCalls.empty() && + "cleared before statepoint sequence completed"); +} + +SDValue +StatepointLoweringState::allocateStackSlot(EVT ValueType, + SelectionDAGBuilder &Builder) { + + NumSlotsAllocatedForStatepoints++; + + // The basic scheme here is to first look for a previously created stack slot + // which is not in use (accounting for the fact arbitrary slots may already + // be reserved), or to create a new stack slot and use it. + + // If this doesn't succeed in 40000 iterations, something is seriously wrong + for (int i = 0; i < 40000; i++) { + assert(Builder.FuncInfo.StatepointStackSlots.size() == + AllocatedStackSlots.size() && + "broken invariant"); + const size_t NumSlots = AllocatedStackSlots.size(); + assert(NextSlotToAllocate <= NumSlots && "broken invariant"); + + if (NextSlotToAllocate >= NumSlots) { + assert(NextSlotToAllocate == NumSlots); + // record stats + if (NumSlots + 1 > StatepointMaxSlotsRequired) { + StatepointMaxSlotsRequired = NumSlots + 1; + } + + SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType); + const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); + auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo(); + MFI->markAsStatepointSpillSlotObjectIndex(FI); + + Builder.FuncInfo.StatepointStackSlots.push_back(FI); + AllocatedStackSlots.push_back(true); + return SpillSlot; + } + if (!AllocatedStackSlots[NextSlotToAllocate]) { + const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate]; + AllocatedStackSlots[NextSlotToAllocate] = true; + return Builder.DAG.getFrameIndex(FI, ValueType); + } + // Note: We deliberately choose to advance this only on the failing path. + // Doing so on the succeeding path involves a bit of complexity that caused + // a minor bug previously. Unless performance shows this matters, please + // keep this code as simple as possible. + NextSlotToAllocate++; + } + llvm_unreachable("infinite loop?"); +} + +/// Utility function for reservePreviousStackSlotForValue. Tries to find +/// stack slot index to which we have spilled value for previous statepoints. +/// LookUpDepth specifies maximum DFS depth this function is allowed to look. +static Optional<int> findPreviousSpillSlot(const Value *Val, + SelectionDAGBuilder &Builder, + int LookUpDepth) { + // Can not look any further - give up now + if (LookUpDepth <= 0) + return Optional<int>(); + + // Spill location is known for gc relocates + if (const auto *Relocate = dyn_cast<GCRelocateInst>(Val)) { + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[Relocate->getStatepoint()]; + + auto It = SpillMap.find(Relocate->getDerivedPtr()); + if (It == SpillMap.end()) + return Optional<int>(); + + return It->second; + } + + // Look through bitcast instructions. + if (const BitCastInst *Cast = dyn_cast<BitCastInst>(Val)) { + return findPreviousSpillSlot(Cast->getOperand(0), Builder, LookUpDepth - 1); + } + + // Look through phi nodes + // All incoming values should have same known stack slot, otherwise result + // is unknown. + if (const PHINode *Phi = dyn_cast<PHINode>(Val)) { + Optional<int> MergedResult = None; + + for (auto &IncomingValue : Phi->incoming_values()) { + Optional<int> SpillSlot = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth - 1); + if (!SpillSlot.hasValue()) + return Optional<int>(); + + if (MergedResult.hasValue() && *MergedResult != *SpillSlot) + return Optional<int>(); + + MergedResult = SpillSlot; + } + return MergedResult; + } + + // TODO: We can do better for PHI nodes. In cases like this: + // ptr = phi(relocated_pointer, not_relocated_pointer) + // statepoint(ptr) + // We will return that stack slot for ptr is unknown. And later we might + // assign different stack slots for ptr and relocated_pointer. This limits + // llvm's ability to remove redundant stores. + // Unfortunately it's hard to accomplish in current infrastructure. + // We use this function to eliminate spill store completely, while + // in example we still need to emit store, but instead of any location + // we need to use special "preferred" location. + + // TODO: handle simple updates. If a value is modified and the original + // value is no longer live, it would be nice to put the modified value in the + // same slot. This allows folding of the memory accesses for some + // instructions types (like an increment). + // statepoint (i) + // i1 = i+1 + // statepoint (i1) + // However we need to be careful for cases like this: + // statepoint(i) + // i1 = i+1 + // statepoint(i, i1) + // Here we want to reserve spill slot for 'i', but not for 'i+1'. If we just + // put handling of simple modifications in this function like it's done + // for bitcasts we might end up reserving i's slot for 'i+1' because order in + // which we visit values is unspecified. + + // Don't know any information about this instruction + return Optional<int>(); +} + +/// Try to find existing copies of the incoming values in stack slots used for +/// statepoint spilling. If we can find a spill slot for the incoming value, +/// mark that slot as allocated, and reuse the same slot for this safepoint. +/// This helps to avoid series of loads and stores that only serve to reshuffle +/// values on the stack between calls. +static void reservePreviousStackSlotForValue(const Value *IncomingValue, + SelectionDAGBuilder &Builder) { + + SDValue Incoming = Builder.getValue(IncomingValue); + + if (isa<ConstantSDNode>(Incoming) || isa<FrameIndexSDNode>(Incoming)) { + // We won't need to spill this, so no need to check for previously + // allocated stack slots + return; + } + + SDValue OldLocation = Builder.StatepointLowering.getLocation(Incoming); + if (OldLocation.getNode()) + // duplicates in input + return; + + const int LookUpDepth = 6; + Optional<int> Index = + findPreviousSpillSlot(IncomingValue, Builder, LookUpDepth); + if (!Index.hasValue()) + return; + + auto Itr = std::find(Builder.FuncInfo.StatepointStackSlots.begin(), + Builder.FuncInfo.StatepointStackSlots.end(), *Index); + assert(Itr != Builder.FuncInfo.StatepointStackSlots.end() && + "value spilled to the unknown stack slot"); + + // This is one of our dedicated lowering slots + const int Offset = + std::distance(Builder.FuncInfo.StatepointStackSlots.begin(), Itr); + if (Builder.StatepointLowering.isStackSlotAllocated(Offset)) { + // stack slot already assigned to someone else, can't use it! + // TODO: currently we reserve space for gc arguments after doing + // normal allocation for deopt arguments. We should reserve for + // _all_ deopt and gc arguments, then start allocating. This + // will prevent some moves being inserted when vm state changes, + // but gc state doesn't between two calls. + return; + } + // Reserve this stack slot + Builder.StatepointLowering.reserveStackSlot(Offset); + + // Cache this slot so we find it when going through the normal + // assignment loop. + SDValue Loc = Builder.DAG.getTargetFrameIndex(*Index, Incoming.getValueType()); + Builder.StatepointLowering.setLocation(Incoming, Loc); +} + +/// Remove any duplicate (as SDValues) from the derived pointer pairs. This +/// is not required for correctness. It's purpose is to reduce the size of +/// StackMap section. It has no effect on the number of spill slots required +/// or the actual lowering. +static void removeDuplicatesGCPtrs(SmallVectorImpl<const Value *> &Bases, + SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, + SelectionDAGBuilder &Builder) { + + // This is horribly inefficient, but I don't care right now + SmallSet<SDValue, 64> Seen; + + SmallVector<const Value *, 64> NewBases, NewPtrs, NewRelocs; + for (size_t i = 0; i < Ptrs.size(); i++) { + SDValue SD = Builder.getValue(Ptrs[i]); + // Only add non-duplicates + if (Seen.count(SD) == 0) { + NewBases.push_back(Bases[i]); + NewPtrs.push_back(Ptrs[i]); + NewRelocs.push_back(Relocs[i]); + } + Seen.insert(SD); + } + assert(Bases.size() >= NewBases.size()); + assert(Ptrs.size() >= NewPtrs.size()); + assert(Relocs.size() >= NewRelocs.size()); + Bases = NewBases; + Ptrs = NewPtrs; + Relocs = NewRelocs; + assert(Ptrs.size() == Bases.size()); + assert(Ptrs.size() == Relocs.size()); +} + +/// Extract call from statepoint, lower it and return pointer to the +/// call node. Also update NodeMap so that getValue(statepoint) will +/// reference lowered call result +static SDNode * +lowerCallFromStatepoint(ImmutableStatepoint ISP, const BasicBlock *EHPadBB, + SelectionDAGBuilder &Builder, + SmallVectorImpl<SDValue> &PendingExports) { + + ImmutableCallSite CS(ISP.getCallSite()); + + SDValue ActualCallee; + + if (ISP.getNumPatchBytes() > 0) { + // If we've been asked to emit a nop sequence instead of a call instruction + // for this statepoint then don't lower the call target, but use a constant + // `null` instead. Not lowering the call target lets statepoint clients get + // away without providing a physical address for the symbolic call target at + // link time. + + const auto &TLI = Builder.DAG.getTargetLoweringInfo(); + const auto &DL = Builder.DAG.getDataLayout(); + + unsigned AS = ISP.getCalledValue()->getType()->getPointerAddressSpace(); + ActualCallee = Builder.DAG.getConstant(0, Builder.getCurSDLoc(), + TLI.getPointerTy(DL, AS)); + } else + ActualCallee = Builder.getValue(ISP.getCalledValue()); + + assert(CS.getCallingConv() != CallingConv::AnyReg && + "anyregcc is not supported on statepoints!"); + + Type *DefTy = ISP.getActualReturnType(); + bool HasDef = !DefTy->isVoidTy(); + + SDValue ReturnValue, CallEndVal; + std::tie(ReturnValue, CallEndVal) = Builder.lowerCallOperands( + ISP.getCallSite(), ImmutableStatepoint::CallArgsBeginPos, + ISP.getNumCallArgs(), ActualCallee, DefTy, EHPadBB, + false /* IsPatchPoint */); + + SDNode *CallEnd = CallEndVal.getNode(); + + // Get a call instruction from the call sequence chain. Tail calls are not + // allowed. The following code is essentially reverse engineering X86's + // LowerCallTo. + // + // We are expecting DAG to have the following form: + // + // ch = eh_label (only in case of invoke statepoint) + // ch, glue = callseq_start ch + // ch, glue = X86::Call ch, glue + // ch, glue = callseq_end ch, glue + // get_return_value ch, glue + // + // get_return_value can either be a sequence of CopyFromReg instructions + // to grab the return value from the return register(s), or it can be a LOAD + // to load a value returned by reference via a stack slot. + + if (HasDef) { + if (CallEnd->getOpcode() == ISD::LOAD) + CallEnd = CallEnd->getOperand(0).getNode(); + else + while (CallEnd->getOpcode() == ISD::CopyFromReg) + CallEnd = CallEnd->getOperand(0).getNode(); + } + + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && "expected!"); + + // Export the result value if needed + const Instruction *GCResult = ISP.getGCResult(); + if (HasDef && GCResult) { + if (GCResult->getParent() != CS.getParent()) { + // Result value will be used in a different basic block so we need to + // export it now. + // Default exporting mechanism will not work here because statepoint call + // has a different type than the actual call. It means that by default + // llvm will create export register of the wrong type (always i32 in our + // case). So instead we need to create export register with correct type + // manually. + // TODO: To eliminate this problem we can remove gc.result intrinsics + // completely and make statepoint call to return a tuple. + unsigned Reg = Builder.FuncInfo.CreateRegs(ISP.getActualReturnType()); + RegsForValue RFV( + *Builder.DAG.getContext(), Builder.DAG.getTargetLoweringInfo(), + Builder.DAG.getDataLayout(), Reg, ISP.getActualReturnType()); + SDValue Chain = Builder.DAG.getEntryNode(); + + RFV.getCopyToRegs(ReturnValue, Builder.DAG, Builder.getCurSDLoc(), Chain, + nullptr); + PendingExports.push_back(Chain); + Builder.FuncInfo.ValueMap[CS.getInstruction()] = Reg; + } else { + // Result value will be used in a same basic block. Don't export it or + // perform any explicit register copies. + // We'll replace the actuall call node shortly. gc_result will grab + // this value. + Builder.setValue(CS.getInstruction(), ReturnValue); + } + } else { + // The token value is never used from here on, just generate a poison value + Builder.setValue(CS.getInstruction(), + Builder.DAG.getIntPtrConstant(-1, Builder.getCurSDLoc())); + } + + return CallEnd->getOperand(0).getNode(); +} + +/// Callect all gc pointers coming into statepoint intrinsic, clean them up, +/// and return two arrays: +/// Bases - base pointers incoming to this statepoint +/// Ptrs - derived pointers incoming to this statepoint +/// Relocs - the gc_relocate corresponding to each base/ptr pair +/// Elements of this arrays should be in one-to-one correspondence with each +/// other i.e Bases[i], Ptrs[i] are from the same gcrelocate call +static void getIncomingStatepointGCValues( + SmallVectorImpl<const Value *> &Bases, SmallVectorImpl<const Value *> &Ptrs, + SmallVectorImpl<const Value *> &Relocs, ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { + Relocs.push_back(Relocate); + Bases.push_back(Relocate->getBasePtr()); + Ptrs.push_back(Relocate->getDerivedPtr()); + } + + // Remove any redundant llvm::Values which map to the same SDValue as another + // input. Also has the effect of removing duplicates in the original + // llvm::Value input list as well. This is a useful optimization for + // reducing the size of the StackMap section. It has no other impact. + removeDuplicatesGCPtrs(Bases, Ptrs, Relocs, Builder); + + assert(Bases.size() == Ptrs.size() && Ptrs.size() == Relocs.size()); +} + +/// Spill a value incoming to the statepoint. It might be either part of +/// vmstate +/// or gcstate. In both cases unconditionally spill it on the stack unless it +/// is a null constant. Return pair with first element being frame index +/// containing saved value and second element with outgoing chain from the +/// emitted store +static std::pair<SDValue, SDValue> +spillIncomingStatepointValue(SDValue Incoming, SDValue Chain, + SelectionDAGBuilder &Builder) { + SDValue Loc = Builder.StatepointLowering.getLocation(Incoming); + + // Emit new store if we didn't do it for this ptr before + if (!Loc.getNode()) { + Loc = Builder.StatepointLowering.allocateStackSlot(Incoming.getValueType(), + Builder); + assert(isa<FrameIndexSDNode>(Loc)); + int Index = cast<FrameIndexSDNode>(Loc)->getIndex(); + // We use TargetFrameIndex so that isel will not select it into LEA + Loc = Builder.DAG.getTargetFrameIndex(Index, Incoming.getValueType()); + + // TODO: We can create TokenFactor node instead of + // chaining stores one after another, this may allow + // a bit more optimal scheduling for them + Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc, + MachinePointerInfo::getFixedStack( + Builder.DAG.getMachineFunction(), Index), + false, false, 0); + + Builder.StatepointLowering.setLocation(Incoming, Loc); + } + + assert(Loc.getNode()); + return std::make_pair(Loc, Chain); +} + +/// Lower a single value incoming to a statepoint node. This value can be +/// either a deopt value or a gc value, the handling is the same. We special +/// case constants and allocas, then fall back to spilling if required. +static void lowerIncomingStatepointValue(SDValue Incoming, + SmallVectorImpl<SDValue> &Ops, + SelectionDAGBuilder &Builder) { + SDValue Chain = Builder.getRoot(); + + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) { + // If the original value was a constant, make sure it gets recorded as + // such in the stackmap. This is required so that the consumer can + // parse any internal format to the deopt state. It also handles null + // pointers and other constant pointers in GC states + pushStackMapConstant(Ops, Builder, C->getSExtValue()); + } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint (this is only + // really meaningful for a deopt value. For GC, we'd be trying to + // relocate the address of the alloca itself?) + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); + } else { + // Otherwise, locate a spill slot and explicitly spill it so it + // can be found by the runtime later. We currently do not support + // tracking values through callee saved registers to their eventual + // spill location. This would be a useful optimization, but would + // need to be optional since it requires a lot of complexity on the + // runtime side which not all would support. + std::pair<SDValue, SDValue> Res = + spillIncomingStatepointValue(Incoming, Chain, Builder); + Ops.push_back(Res.first); + Chain = Res.second; + } + + Builder.DAG.setRoot(Chain); +} + +/// Lower deopt state and gc pointer arguments of the statepoint. The actual +/// lowering is described in lowerIncomingStatepointValue. This function is +/// responsible for lowering everything in the right position and playing some +/// tricks to avoid redundant stack manipulation where possible. On +/// completion, 'Ops' will contain ready to use operands for machine code +/// statepoint. The chain nodes will have already been created and the DAG root +/// will be set to the last value spilled (if any were). +static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops, + ImmutableStatepoint StatepointSite, + SelectionDAGBuilder &Builder) { + + // Lower the deopt and gc arguments for this statepoint. Layout will + // be: deopt argument length, deopt arguments.., gc arguments... + + SmallVector<const Value *, 64> Bases, Ptrs, Relocations; + getIncomingStatepointGCValues(Bases, Ptrs, Relocations, StatepointSite, + Builder); + +#ifndef NDEBUG + // Check that each of the gc pointer and bases we've gotten out of the + // safepoint is something the strategy thinks might be a pointer into the GC + // heap. This is basically just here to help catch errors during statepoint + // insertion. TODO: This should actually be in the Verifier, but we can't get + // to the GCStrategy from there (yet). + GCStrategy &S = Builder.GFI->getStrategy(); + for (const Value *V : Bases) { + auto Opt = S.isGCManagedPointer(V->getType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed base pointer found in statepoint"); + } + } + for (const Value *V : Ptrs) { + auto Opt = S.isGCManagedPointer(V->getType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && + "non gc managed derived pointer found in statepoint"); + } + } + for (const Value *V : Relocations) { + auto Opt = S.isGCManagedPointer(V->getType()); + if (Opt.hasValue()) { + assert(Opt.getValue() && "non gc managed pointer relocated"); + } + } +#endif + + // Before we actually start lowering (and allocating spill slots for values), + // reserve any stack slots which we judge to be profitable to reuse for a + // particular value. This is purely an optimization over the code below and + // doesn't change semantics at all. It is important for performance that we + // reserve slots for both deopt and gc values before lowering either. + for (const Value *V : StatepointSite.vm_state_args()) { + reservePreviousStackSlotForValue(V, Builder); + } + for (unsigned i = 0; i < Bases.size(); ++i) { + reservePreviousStackSlotForValue(Bases[i], Builder); + reservePreviousStackSlotForValue(Ptrs[i], Builder); + } + + // First, prefix the list with the number of unique values to be + // lowered. Note that this is the number of *Values* not the + // number of SDValues required to lower them. + const int NumVMSArgs = StatepointSite.getNumTotalVMSArgs(); + pushStackMapConstant(Ops, Builder, NumVMSArgs); + + assert(NumVMSArgs == std::distance(StatepointSite.vm_state_begin(), + StatepointSite.vm_state_end())); + + // The vm state arguments are lowered in an opaque manner. We do + // not know what type of values are contained within. We skip the + // first one since that happens to be the total number we lowered + // explicitly just above. We could have left it in the loop and + // not done it explicitly, but it's far easier to understand this + // way. + for (const Value *V : StatepointSite.vm_state_args()) { + SDValue Incoming = Builder.getValue(V); + lowerIncomingStatepointValue(Incoming, Ops, Builder); + } + + // Finally, go ahead and lower all the gc arguments. There's no prefixed + // length for this one. After lowering, we'll have the base and pointer + // arrays interwoven with each (lowered) base pointer immediately followed by + // it's (lowered) derived pointer. i.e + // (base[0], ptr[0], base[1], ptr[1], ...) + for (unsigned i = 0; i < Bases.size(); ++i) { + const Value *Base = Bases[i]; + lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder); + + const Value *Ptr = Ptrs[i]; + lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder); + } + + // If there are any explicit spill slots passed to the statepoint, record + // them, but otherwise do not do anything special. These are user provided + // allocas and give control over placement to the consumer. In this case, + // it is the contents of the slot which may get updated, not the pointer to + // the alloca + for (Value *V : StatepointSite.gc_args()) { + SDValue Incoming = Builder.getValue(V); + if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) { + // This handles allocas as arguments to the statepoint + Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(), + Incoming.getValueType())); + } + } + + // Record computed locations for all lowered values. + // This can not be embedded in lowering loops as we need to record *all* + // values, while previous loops account only values with unique SDValues. + const Instruction *StatepointInstr = + StatepointSite.getCallSite().getInstruction(); + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + Builder.FuncInfo.StatepointRelocatedValues[StatepointInstr]; + + for (const GCRelocateInst *Relocate : StatepointSite.getRelocates()) { + const Value *V = Relocate->getDerivedPtr(); + SDValue SDV = Builder.getValue(V); + SDValue Loc = Builder.StatepointLowering.getLocation(SDV); + + if (Loc.getNode()) { + SpillMap[V] = cast<FrameIndexSDNode>(Loc)->getIndex(); + } else { + // Record value as visited, but not spilled. This is case for allocas + // and constants. For this values we can avoid emitting spill load while + // visiting corresponding gc_relocate. + // Actually we do not need to record them in this map at all. + // We do this only to check that we are not relocating any unvisited + // value. + SpillMap[V] = None; + + // Default llvm mechanisms for exporting values which are used in + // different basic blocks does not work for gc relocates. + // Note that it would be incorrect to teach llvm that all relocates are + // uses of the corresponding values so that it would automatically + // export them. Relocates of the spilled values does not use original + // value. + if (Relocate->getParent() != StatepointInstr->getParent()) + Builder.ExportFromCurrentBlock(V); + } + } +} + +void SelectionDAGBuilder::visitStatepoint(const CallInst &CI) { + // Check some preconditions for sanity + assert(isStatepoint(&CI) && + "function called must be the statepoint function"); + + LowerStatepoint(ImmutableStatepoint(&CI)); +} + +void SelectionDAGBuilder::LowerStatepoint( + ImmutableStatepoint ISP, const BasicBlock *EHPadBB /*= nullptr*/) { + // The basic scheme here is that information about both the original call and + // the safepoint is encoded in the CallInst. We create a temporary call and + // lower it, then reverse engineer the calling sequence. + + NumOfStatepoints++; + // Clear state + StatepointLowering.startNewStatepoint(*this); + + ImmutableCallSite CS(ISP.getCallSite()); + +#ifndef NDEBUG + // Consistency check. Check only relocates in the same basic block as thier + // statepoint. + for (const User *U : CS->users()) { + const CallInst *Call = cast<CallInst>(U); + if (isa<GCRelocateInst>(Call) && Call->getParent() == CS.getParent()) + StatepointLowering.scheduleRelocCall(*Call); + } +#endif + +#ifndef NDEBUG + // If this is a malformed statepoint, report it early to simplify debugging. + // This should catch any IR level mistake that's made when constructing or + // transforming statepoints. + ISP.verify(); + + // Check that the associated GCStrategy expects to encounter statepoints. + assert(GFI->getStrategy().useStatepoints() && + "GCStrategy does not expect to encounter statepoints"); +#endif + + // Lower statepoint vmstate and gcstate arguments + SmallVector<SDValue, 10> LoweredMetaArgs; + lowerStatepointMetaArgs(LoweredMetaArgs, ISP, *this); + + // Get call node, we will replace it later with statepoint + SDNode *CallNode = + lowerCallFromStatepoint(ISP, EHPadBB, *this, PendingExports); + + // Construct the actual GC_TRANSITION_START, STATEPOINT, and GC_TRANSITION_END + // nodes with all the appropriate arguments and return values. + + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + SDValue Chain = CallNode->getOperand(0); + + SDValue Glue; + bool CallHasIncomingGlue = CallNode->getGluedNode(); + if (CallHasIncomingGlue) { + // Glue is always last operand + Glue = CallNode->getOperand(CallNode->getNumOperands() - 1); + } + + // Build the GC_TRANSITION_START node if necessary. + // + // The operands to the GC_TRANSITION_{START,END} nodes are laid out in the + // order in which they appear in the call to the statepoint intrinsic. If + // any of the operands is a pointer-typed, that operand is immediately + // followed by a SRCVALUE for the pointer that may be used during lowering + // (e.g. to form MachinePointerInfo values for loads/stores). + const bool IsGCTransition = + (ISP.getFlags() & (uint64_t)StatepointFlags::GCTransition) == + (uint64_t)StatepointFlags::GCTransition; + if (IsGCTransition) { + SmallVector<SDValue, 8> TSOps; + + // Add chain + TSOps.push_back(Chain); + + // Add GC transition arguments + for (const Value *V : ISP.gc_transition_args()) { + TSOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TSOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue if necessary + if (CallHasIncomingGlue) + TSOps.push_back(Glue); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_START, getCurSDLoc(), NodeTys, TSOps); + + Chain = GCTransitionStart.getValue(0); + Glue = GCTransitionStart.getValue(1); + } + + // TODO: Currently, all of these operands are being marked as read/write in + // PrologEpilougeInserter.cpp, we should special case the VMState arguments + // and flags to be read-only. + SmallVector<SDValue, 40> Ops; + + // Add the <id> and <numBytes> constants. + Ops.push_back(DAG.getTargetConstant(ISP.getID(), getCurSDLoc(), MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(ISP.getNumPatchBytes(), getCurSDLoc(), MVT::i32)); + + // Calculate and push starting position of vmstate arguments + // Get number of arguments incoming directly into call node + unsigned NumCallRegArgs = + CallNode->getNumOperands() - (CallHasIncomingGlue ? 4 : 3); + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, getCurSDLoc(), MVT::i32)); + + // Add call target + SDValue CallTarget = SDValue(CallNode->getOperand(1).getNode(), 0); + Ops.push_back(CallTarget); + + // Add call arguments + // Get position of register mask in the call + SDNode::op_iterator RegMaskIt; + if (CallHasIncomingGlue) + RegMaskIt = CallNode->op_end() - 2; + else + RegMaskIt = CallNode->op_end() - 1; + Ops.insert(Ops.end(), CallNode->op_begin() + 2, RegMaskIt); + + // Add a constant argument for the calling convention + pushStackMapConstant(Ops, *this, CS.getCallingConv()); + + // Add a constant argument for the flags + uint64_t Flags = ISP.getFlags(); + assert( + ((Flags & ~(uint64_t)StatepointFlags::MaskAll) == 0) + && "unknown flag used"); + pushStackMapConstant(Ops, *this, Flags); + + // Insert all vmstate and gcstate arguments + Ops.insert(Ops.end(), LoweredMetaArgs.begin(), LoweredMetaArgs.end()); + + // Add register mask from call node + Ops.push_back(*RegMaskIt); + + // Add chain + Ops.push_back(Chain); + + // Same for the glue, but we add it only if original call had it + if (Glue.getNode()) + Ops.push_back(Glue); + + // Compute return values. Provide a glue output since we consume one as + // input. This allows someone else to chain off us as needed. + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDNode *StatepointMCNode = + DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops); + + SDNode *SinkNode = StatepointMCNode; + + // Build the GC_TRANSITION_END node if necessary. + // + // See the comment above regarding GC_TRANSITION_START for the layout of + // the operands to the GC_TRANSITION_END node. + if (IsGCTransition) { + SmallVector<SDValue, 8> TEOps; + + // Add chain + TEOps.push_back(SDValue(StatepointMCNode, 0)); + + // Add GC transition arguments + for (const Value *V : ISP.gc_transition_args()) { + TEOps.push_back(getValue(V)); + if (V->getType()->isPointerTy()) + TEOps.push_back(DAG.getSrcValue(V)); + } + + // Add glue + TEOps.push_back(SDValue(StatepointMCNode, 1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + SDValue GCTransitionStart = + DAG.getNode(ISD::GC_TRANSITION_END, getCurSDLoc(), NodeTys, TEOps); + + SinkNode = GCTransitionStart.getNode(); + } + + // Replace original call + DAG.ReplaceAllUsesWith(CallNode, SinkNode); // This may update Root + // Remove original call node + DAG.DeleteNode(CallNode); + + // DON'T set the root - under the assumption that it's already set past the + // inserted node we created. + + // TODO: A better future implementation would be to emit a single variable + // argument, variable return value STATEPOINT node here and then hookup the + // return value of each gc.relocate to the respective output of the + // previously emitted STATEPOINT value. Unfortunately, this doesn't appear + // to actually be possible today. +} + +void SelectionDAGBuilder::visitGCResult(const CallInst &CI) { + // The result value of the gc_result is simply the result of the actual + // call. We've already emitted this, so just grab the value. + Instruction *I = cast<Instruction>(CI.getArgOperand(0)); + assert(isStatepoint(I) && "first argument must be a statepoint token"); + + if (I->getParent() != CI.getParent()) { + // Statepoint is in different basic block so we should have stored call + // result in a virtual register. + // We can not use default getValue() functionality to copy value from this + // register because statepoint and actuall call return types can be + // different, and getValue() will use CopyFromReg of the wrong type, + // which is always i32 in our case. + PointerType *CalleeType = cast<PointerType>( + ImmutableStatepoint(I).getCalledValue()->getType()); + Type *RetTy = + cast<FunctionType>(CalleeType->getElementType())->getReturnType(); + SDValue CopyFromReg = getCopyFromRegs(I, RetTy); + + assert(CopyFromReg.getNode()); + setValue(&CI, CopyFromReg); + } else { + setValue(&CI, getValue(I)); + } +} + +void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { +#ifndef NDEBUG + // Consistency check + // We skip this check for relocates not in the same basic block as thier + // statepoint. It would be too expensive to preserve validation info through + // different basic blocks. + if (Relocate.getStatepoint()->getParent() == Relocate.getParent()) { + StatepointLowering.relocCallVisited(Relocate); + } +#endif + + const Value *DerivedPtr = Relocate.getDerivedPtr(); + SDValue SD = getValue(DerivedPtr); + + FunctionLoweringInfo::StatepointSpilledValueMapTy &SpillMap = + FuncInfo.StatepointRelocatedValues[Relocate.getStatepoint()]; + + // We should have recorded location for this pointer + assert(SpillMap.count(DerivedPtr) && "Relocating not lowered gc value"); + Optional<int> DerivedPtrLocation = SpillMap[DerivedPtr]; + + // We didn't need to spill these special cases (constants and allocas). + // See the handling in spillIncomingValueForStatepoint for detail. + if (!DerivedPtrLocation) { + setValue(&Relocate, SD); + return; + } + + SDValue SpillSlot = DAG.getTargetFrameIndex(*DerivedPtrLocation, + SD.getValueType()); + + // Be conservative: flush all pending loads + // TODO: Probably we can be less restrictive on this, + // it may allow more scheduling opportunities. + SDValue Chain = getRoot(); + + SDValue SpillLoad = + DAG.getLoad(SpillSlot.getValueType(), getCurSDLoc(), Chain, SpillSlot, + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), + *DerivedPtrLocation), + false, false, false, 0); + + // Again, be conservative, don't emit pending loads + DAG.setRoot(SpillLoad.getValue(1)); + + assert(SpillLoad.getNode()); + setValue(&Relocate, SpillLoad); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h new file mode 100644 index 0000000..82d0c62 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -0,0 +1,116 @@ +//===-- StatepointLowering.h - SDAGBuilder's statepoint code -*- C++ -*---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes support code use by SelectionDAGBuilder when lowering a +// statepoint sequence in SelectionDAG IR. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H +#define LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include <vector> + +namespace llvm { +class SelectionDAGBuilder; + +/// This class tracks both per-statepoint and per-selectiondag information. +/// For each statepoint it tracks locations of it's gc valuess (incoming and +/// relocated) and list of gcreloc calls scheduled for visiting (this is +/// used for a debug mode consistency check only). The spill slot tracking +/// works in concert with information in FunctionLoweringInfo. +class StatepointLoweringState { +public: + StatepointLoweringState() : NextSlotToAllocate(0) {} + + /// Reset all state tracking for a newly encountered safepoint. Also + /// performs some consistency checking. + void startNewStatepoint(SelectionDAGBuilder &Builder); + + /// Clear the memory usage of this object. This is called from + /// SelectionDAGBuilder::clear. We require this is never called in the + /// midst of processing a statepoint sequence. + void clear(); + + /// Returns the spill location of a value incoming to the current + /// statepoint. Will return SDValue() if this value hasn't been + /// spilled. Otherwise, the value has already been spilled and no + /// further action is required by the caller. + SDValue getLocation(SDValue val) { + if (!Locations.count(val)) + return SDValue(); + return Locations[val]; + } + void setLocation(SDValue val, SDValue Location) { + assert(!Locations.count(val) && + "Trying to allocate already allocated location"); + Locations[val] = Location; + } + + /// Record the fact that we expect to encounter a given gc_relocate + /// before the next statepoint. If we don't see it, we'll report + /// an assertion. + void scheduleRelocCall(const CallInst &RelocCall) { + PendingGCRelocateCalls.push_back(&RelocCall); + } + /// Remove this gc_relocate from the list we're expecting to see + /// before the next statepoint. If we weren't expecting to see + /// it, we'll report an assertion. + void relocCallVisited(const CallInst &RelocCall) { + SmallVectorImpl<const CallInst *>::iterator itr = + std::find(PendingGCRelocateCalls.begin(), PendingGCRelocateCalls.end(), + &RelocCall); + assert(itr != PendingGCRelocateCalls.end() && + "Visited unexpected gcrelocate call"); + PendingGCRelocateCalls.erase(itr); + } + + // TODO: Should add consistency tracking to ensure we encounter + // expected gc_result calls too. + + /// Get a stack slot we can use to store an value of type ValueType. This + /// will hopefully be a recylced slot from another statepoint. + SDValue allocateStackSlot(EVT ValueType, SelectionDAGBuilder &Builder); + + void reserveStackSlot(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + assert(!AllocatedStackSlots[Offset] && "already reserved!"); + assert(NextSlotToAllocate <= (unsigned)Offset && "consistency!"); + AllocatedStackSlots[Offset] = true; + } + bool isStackSlotAllocated(int Offset) { + assert(Offset >= 0 && Offset < (int)AllocatedStackSlots.size() && + "out of bounds"); + return AllocatedStackSlots[Offset]; + } + +private: + /// Maps pre-relocation value (gc pointer directly incoming into statepoint) + /// into it's location (currently only stack slots) + DenseMap<SDValue, SDValue> Locations; + + /// A boolean indicator for each slot listed in the FunctionInfo as to + /// whether it has been used in the current statepoint. Since we try to + /// preserve stack slots across safepoints, there can be gaps in which + /// slots have been allocated. + SmallVector<bool, 50> AllocatedStackSlots; + + /// Points just beyond the last slot known to have been allocated + unsigned NextSlotToAllocate; + + /// Keep track of pending gcrelocate calls for consistency check + SmallVector<const CallInst *, 10> PendingGCRelocateCalls; +}; +} // end namespace llvm + +#endif // LLVM_LIB_CODEGEN_SELECTIONDAG_STATEPOINTLOWERING_H diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp new file mode 100644 index 0000000..c64d882 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -0,0 +1,3083 @@ +//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLowering class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cctype> +using namespace llvm; + +/// NOTE: The TargetMachine owns TLOF. +TargetLowering::TargetLowering(const TargetMachine &tm) + : TargetLoweringBase(tm) {} + +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return nullptr; +} + +/// Check whether a given call node is in tail position within its function. If +/// so, it sets Chain to the input chain of the tail call. +bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex) + .removeAttribute(Attribute::NoAlias).hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; + + // Check if the only use is a function return node. + return isUsedByReturnOnly(Node, Chain); +} + +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isInAlloca = CS->paramHasAttr(AttrIdx, Attribute::InAlloca); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} + +/// Generate a libcall taking the given operands as arguments and returning a +/// result of type RetVT. +std::pair<SDValue, SDValue> +TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + ArrayRef<SDValue> Ops, + bool isSigned, SDLoc dl, + bool doesNotReturn, + bool isReturnValueUsed) const { + TargetLowering::ArgListTy Args; + Args.reserve(Ops.size()); + + TargetLowering::ArgListEntry Entry; + for (SDValue Op : Ops) { + Entry.Node = Op; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Entry.isZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned); + Args.push_back(Entry); + } + + if (LC == RTLIB::UNKNOWN_LIBCALL) + report_fatal_error("Unsupported library call operation!"); + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy(DAG.getDataLayout())); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering::CallLoweringInfo CLI(DAG); + bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) + .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) + .setSExtResult(signExtend).setZExtResult(!signExtend); + return LowerCallTo(CLI); +} + +/// Soften the operands of a comparison. This code is shared among BR_CC, +/// SELECT_CC, and SETCC handlers. +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + SDLoc dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + bool ShouldInvertCC = false; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : + (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : + (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + break; + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUEQ: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: + // Invert CC for unordered comparisons + ShouldInvertCC = true; + switch (CCCode) { + case ISD::SETULT: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETULE: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUGT: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETUGE: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + default: llvm_unreachable("Do not know how to soften this setcc!"); + } + } + + // Use the target specific return value for comparions lib calls. + EVT RetVT = getCmpLibcallReturnType(); + SDValue Ops[2] = {NewLHS, NewRHS}; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/, + dl).first; + NewRHS = DAG.getConstant(0, dl, RetVT); + + CCCode = getCmpLibcallCC(LC1); + if (ShouldInvertCC) + CCCode = getSetCCInverse(CCCode, /*isInteger=*/true); + + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/, + dl).first; + NewLHS = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); + } +} + +/// Return the entry encoding for a jump table in the current function. The +/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. +unsigned TargetLowering::getJumpTableEncoding() const { + // In non-pic modes, just use the address of a block. + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) + return MachineJumpTableInfo::EK_BlockAddress; + + // In PIC mode, if the target supports a GPRel32 directive, use it. + if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr) + return MachineJumpTableInfo::EK_GPRel32BlockAddress; + + // Otherwise, use a label difference. + return MachineJumpTableInfo::EK_LabelDifference32; +} + +SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table, + SelectionDAG &DAG) const { + // If our PIC model is GP relative, use the global offset table as the base. + unsigned JTEncoding = getJumpTableEncoding(); + + if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) || + (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress)) + return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout())); + + return Table; +} + +/// This returns the relocation base for the given PIC jumptable, the same as +/// getPICJumpTableRelocBase, but as an MCExpr. +const MCExpr * +TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + unsigned JTI,MCContext &Ctx) const{ + // The normal PIC reloc base is the label at the start of the jump table. + return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx); +} + +bool +TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { + // Assume that everything is safe in static mode. + if (getTargetMachine().getRelocationModel() == Reloc::Static) + return true; + + // In dynamic-no-pic mode, assume that known defined values are safe. + if (getTargetMachine().getRelocationModel() == Reloc::DynamicNoPIC && + GA && GA->getGlobal()->isStrongDefinitionForLinker()) + return true; + + // Otherwise assume nothing is safe. + return false; +} + +//===----------------------------------------------------------------------===// +// Optimization Methods +//===----------------------------------------------------------------------===// + +/// Check to see if the specified operand of the specified instruction is a +/// constant integer. If so, check to see if there are any bits set in the +/// constant that are not demanded. If so, shrink the constant and return true. +bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, + const APInt &Demanded) { + SDLoc dl(Op); + + // FIXME: ISD::SELECT, ISD::SELECT_CC + switch (Op.getOpcode()) { + default: break; + case ISD::XOR: + case ISD::AND: + case ISD::OR: { + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + if (!C) return false; + + if (Op.getOpcode() == ISD::XOR && + (C->getAPIntValue() | (~Demanded)).isAllOnesValue()) + return false; + + // if we can expand it to have all bits set, do it + if (C->getAPIntValue().intersects(~Demanded)) { + EVT VT = Op.getValueType(); + SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0), + DAG.getConstant(Demanded & + C->getAPIntValue(), + dl, VT)); + return CombineTo(Op, New); + } + + break; + } + } + + return false; +} + +/// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. +/// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be +/// generalized for targets with other types of implicit widening casts. +bool +TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, + unsigned BitWidth, + const APInt &Demanded, + SDLoc dl) { + assert(Op.getNumOperands() == 2 && + "ShrinkDemandedOp only supports binary operators!"); + assert(Op.getNode()->getNumValues() == 1 && + "ShrinkDemandedOp only supports nodes with one result!"); + + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + + // Don't do this if the node has another user, which may require the + // full value. + if (!Op.getNode()->hasOneUse()) + return false; + + // Search for the smallest integer type with free casts to and from + // Op's type. For expedience, just check power-of-2 integer types. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); + unsigned SmallVTBits = DemandedSize; + if (!isPowerOf2_32(SmallVTBits)) + SmallVTBits = NextPowerOf2(SmallVTBits); + for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { + EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits); + if (TLI.isTruncateFree(Op.getValueType(), SmallVT) && + TLI.isZExtFree(SmallVT, Op.getValueType())) { + // We found a type with free casts. + SDValue X = DAG.getNode(Op.getOpcode(), dl, SmallVT, + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(0)), + DAG.getNode(ISD::TRUNCATE, dl, SmallVT, + Op.getNode()->getOperand(1))); + bool NeedZext = DemandedSize > SmallVTBits; + SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, + dl, Op.getValueType(), X); + return CombineTo(Op, Z); + } + } + return false; +} + +/// Look at Op. At this point, we know that only the DemandedMask bits of the +/// result of Op are ever used downstream. If we can use this information to +/// simplify Op, create a new simplified DAG node and return true, returning the +/// original and new nodes in Old and New. Otherwise, analyze the expression and +/// return a mask of KnownOne and KnownZero bits for the expression (used to +/// simplify the caller). The KnownZero/One bits may only be accurate for those +/// bits in the DemandedMask. +bool TargetLowering::SimplifyDemandedBits(SDValue Op, + const APInt &DemandedMask, + APInt &KnownZero, + APInt &KnownOne, + TargetLoweringOpt &TLO, + unsigned Depth) const { + unsigned BitWidth = DemandedMask.getBitWidth(); + assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && + "Mask size mismatches value type size!"); + APInt NewMask = DemandedMask; + SDLoc dl(Op); + auto &DL = TLO.DAG.getDataLayout(); + + // Don't know anything. + KnownZero = KnownOne = APInt(BitWidth, 0); + + // Other users may use these bits. + if (!Op.getNode()->hasOneUse()) { + if (Depth != 0) { + // If not at the root, Just compute the KnownZero/KnownOne bits to + // simplify things downstream. + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + return false; + } + // If this is the root being simplified, allow it to have multiple uses, + // just set the NewMask to all bits. + NewMask = APInt::getAllOnesValue(BitWidth); + } else if (DemandedMask == 0) { + // Not demanding any bits from Op. + if (Op.getOpcode() != ISD::UNDEF) + return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType())); + return false; + } else if (Depth == 6) { // Limit search depth. + return false; + } + + APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; + switch (Op.getOpcode()) { + case ISD::Constant: + // We know all of the bits for a constant! + KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue(); + KnownZero = ~KnownOne; + return false; // Don't fall through, will infinitely loop. + case ISD::AND: + // If the RHS is a constant, check to see if the LHS would be zero without + // using the bits from the RHS. Below, we use knowledge about the RHS to + // simplify the LHS, here we're using information from the LHS to simplify + // the RHS. + if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt LHSZero, LHSOne; + // Do not increment Depth here; that can cause an infinite loop. + TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth); + // If the LHS already has zeros where RHSC does, this and is dead. + if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + // If any of the set bits in the RHS are known zero on the LHS, shrink + // the constant. + if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) + return true; + } + + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known one on one side, return the other. + // These bits cannot contribute to the result of the 'and'. + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the demanded bits in the inputs are known zeros, return zero. + if ((NewMask & (KnownZero|KnownZero2)) == NewMask) + return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, Op.getValueType())); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-1 bits are only known if set in both the LHS & RHS. + KnownOne &= KnownOne2; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + KnownZero |= KnownZero2; + break; + case ISD::OR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, + KnownZero2, KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'or'. + if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If all of the potentially set bits on one side are known to be set on + // the other side, just use the 'other' side. + if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the RHS is a constant, see if we can simplify it. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // Output known-0 bits are only known if clear in both the LHS & RHS. + KnownZero &= KnownZero2; + // Output known-1 are known to be set if set in either the LHS | RHS. + KnownOne |= KnownOne2; + break; + case ISD::XOR: + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If all of the demanded bits are known zero on one side, return the other. + // These bits cannot contribute to the result of the 'xor'. + if ((KnownZero & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(0)); + if ((KnownZero2 & NewMask) == NewMask) + return TLO.CombineTo(Op, Op.getOperand(1)); + // If the operation can be done in a smaller type, do so. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + + // If all of the unknown bits are known to be zero on one side or the other + // (but not both) turn this into an *inclusive* or. + // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 + if ((NewMask & ~KnownZero & ~KnownZero2) == 0) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(), + Op.getOperand(0), + Op.getOperand(1))); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); + + // If all of the demanded bits on one side are known, and all of the set + // bits on that side are also known to be set on the other side, turn this + // into an AND, as we know the bits will be cleared. + // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 + // NB: it is okay if more bits are known than are requested + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if (KnownOne == KnownOne2) { // set bits are the same on both sides + EVT VT = Op.getValueType(); + SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, dl, VT); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT, + Op.getOperand(0), ANDC)); + } + } + + // If the RHS is a constant, see if we can simplify it. + // for XOR, we prefer to force bits to 1 if they will make a -1. + // if we can't force bits, try to shrink constant + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + APInt Expanded = C->getAPIntValue() | (~NewMask); + // if we can expand it to have all bits set, do it + if (Expanded.isAllOnesValue()) { + if (Expanded != C->getAPIntValue()) { + EVT VT = Op.getValueType(); + SDValue New = TLO.DAG.getNode(Op.getOpcode(), dl,VT, Op.getOperand(0), + TLO.DAG.getConstant(Expanded, dl, VT)); + return TLO.CombineTo(Op, New); + } + // if it already has all the bits set, nothing to change + // but don't shrink either! + } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { + return true; + } + } + + KnownZero = KnownZeroOut; + KnownOne = KnownOneOut; + break; + case ISD::SELECT: + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SELECT_CC: + if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If the operands are constants, see if we can simplify them. + if (TLO.ShrinkDemandedConstant(Op, NewMask)) + return true; + + // Only known if known in both the LHS and RHS. + KnownOne &= KnownOne2; + KnownZero &= KnownZero2; + break; + case ISD::SHL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + unsigned ShAmt = SA->getZExtValue(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a + // single shift. We can do this if the bottom bits (which are shifted + // out) are never demanded. + if (InOp.getOpcode() == ISD::SRL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SHL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SRL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), + KnownZero, KnownOne, TLO, Depth+1)) + return true; + + // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits + // are not demanded. This will likely allow the anyext to be folded away. + if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) { + SDValue InnerOp = InOp.getNode()->getOperand(0); + EVT InnerVT = InnerOp.getValueType(); + unsigned InnerBits = InnerVT.getSizeInBits(); + if (ShAmt < InnerBits && NewMask.lshr(InnerBits) == 0 && + isTypeDesirableForOp(ISD::SHL, InnerVT)) { + EVT ShTy = getShiftAmountTy(InnerVT, DL); + if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) + ShTy = InnerVT; + SDValue NarrowShl = + TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp, + TLO.DAG.getConstant(ShAmt, dl, ShTy)); + return + TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), + NarrowShl)); + } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa<ConstantSDNode>(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast<ConstantSDNode>(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, dl, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } + } + + KnownZero <<= SA->getZExtValue(); + KnownOne <<= SA->getZExtValue(); + // low bits known zero. + KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getZExtValue()); + } + break; + case ISD::SRL: + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + EVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + unsigned VTSize = VT.getSizeInBits(); + SDValue InOp = Op.getOperand(0); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) + InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt); + + // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a + // single shift. We can do this if the top bits (which are shifted out) + // are never demanded. + if (InOp.getOpcode() == ISD::SHL && + isa<ConstantSDNode>(InOp.getOperand(1))) { + if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { + unsigned C1= cast<ConstantSDNode>(InOp.getOperand(1))->getZExtValue(); + unsigned Opc = ISD::SRL; + int Diff = ShAmt-C1; + if (Diff < 0) { + Diff = -Diff; + Opc = ISD::SHL; + } + + SDValue NewSA = + TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, + InOp.getOperand(0), NewSA)); + } + } + + // Compute the new bits that are at the top now. + if (SimplifyDemandedBits(InOp, InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + KnownZero |= HighBits; // High bits known zero. + } + break; + case ISD::SRA: + // If this is an arithmetic shift right and only the low-bit is set, we can + // always convert this into a logical shr, even if the shift amount is + // variable. The low bit of the shift cannot be an input sign bit unless + // the shift amount is >= the size of the datatype, which is undefined. + if (NewMask == 1) + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); + + if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { + EVT VT = Op.getValueType(); + unsigned ShAmt = SA->getZExtValue(); + + // If the shift count is an invalid immediate, don't do anything. + if (ShAmt >= BitWidth) + break; + + APInt InDemandedMask = (NewMask << ShAmt); + + // If the shift is exact, then it does demand the low bits (and knows that + // they are zero). + if (cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()) + InDemandedMask |= APInt::getLowBitsSet(BitWidth, ShAmt); + + // If any of the demanded bits are produced by the sign extension, we also + // demand the input sign bit. + APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); + if (HighBits.intersects(NewMask)) + InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits()); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.lshr(ShAmt); + KnownOne = KnownOne.lshr(ShAmt); + + // Handle the sign bit, adjusted to where it is now in the mask. + APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); + + // If the input sign bit is known to be zero, or if none of the top bits + // are demanded, turn this into an unsigned shift right. + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + SDNodeFlags Flags; + Flags.setExact(cast<BinaryWithFlagsSDNode>(Op)->Flags.hasExact()); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), + Op.getOperand(1), &Flags)); + } + + int Log2 = NewMask.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, dl, + Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), NewSA)); + } + + if (KnownOne.intersects(SignBit)) + // New bits are known one. + KnownOne |= HighBits; + } + break; + case ISD::SIGN_EXTEND_INREG: { + EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + + APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1); + // If we only care about the highest bit, don't bother shifting right. + if (MsbMask == NewMask) { + unsigned ShAmt = ExVT.getScalarType().getSizeInBits(); + SDValue InOp = Op.getOperand(0); + unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits(); + bool AlreadySignExtended = + TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1; + // However if the input is already sign extended we expect the sign + // extension to be dropped altogether later and do not simplify. + if (!AlreadySignExtended) { + // Compute the correct shift amount type, which must be getShiftAmountTy + // for scalar types after legalization. + EVT ShiftAmtTy = Op.getValueType(); + if (TLO.LegalTypes() && !ShiftAmtTy.isVector()) + ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL); + + SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ShAmt, dl, + ShiftAmtTy); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), InOp, + ShiftAmt)); + } + } + + // Sign extension. Compute the demanded bits in the result that are not + // present in the input. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, + BitWidth - ExVT.getScalarType().getSizeInBits()); + + // If none of the extended bits are demanded, eliminate the sextinreg. + if ((NewBits & NewMask) == 0) + return TLO.CombineTo(Op, Op.getOperand(0)); + + APInt InSignBit = + APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth); + APInt InputDemandedBits = + APInt::getLowBitsSet(BitWidth, + ExVT.getScalarType().getSizeInBits()) & + NewMask; + + // Since the sign extended bits are demanded, we know that the sign + // bit is demanded. + InputDemandedBits |= InSignBit; + + if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + // If the sign bit of the input is known set or clear, then we know the + // top bits of the result. + + // If the input sign bit is known zero, convert this into a zero extension. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, + TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT)); + + if (KnownOne.intersects(InSignBit)) { // Input sign bit known set + KnownOne |= NewBits; + KnownZero &= ~NewBits; + } else { // Input sign bit unknown + KnownZero &= ~NewBits; + KnownOne &= ~NewBits; + } + break; + } + case ISD::BUILD_PAIR: { + EVT HalfVT = Op.getOperand(0).getValueType(); + unsigned HalfBitWidth = HalfVT.getScalarSizeInBits(); + + APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth); + APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth); + + APInt KnownZeroLo, KnownOneLo; + APInt KnownZeroHi, KnownOneHi; + + if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownZeroLo, + KnownOneLo, TLO, Depth + 1)) + return true; + + if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownZeroHi, + KnownOneHi, TLO, Depth + 1)) + return true; + + KnownZero = KnownZeroLo.zext(BitWidth) | + KnownZeroHi.zext(BitWidth).shl(HalfBitWidth); + + KnownOne = KnownOneLo.zext(BitWidth) | + KnownOneHi.zext(BitWidth).shl(HalfBitWidth); + break; + } + case ISD::ZERO_EXTEND: { + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt InMask = NewMask.trunc(OperandBitWidth); + + // If none of the top bits are demanded, convert this into an any_extend. + APInt NewBits = + APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; + if (!NewBits.intersects(NewMask)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + KnownZero |= NewBits; + break; + } + case ISD::SIGN_EXTEND: { + EVT InVT = Op.getOperand(0).getValueType(); + unsigned InBits = InVT.getScalarType().getSizeInBits(); + APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); + APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); + APInt NewBits = ~InMask & NewMask; + + // If none of the top bits are demanded, convert this into an any_extend. + if (NewBits == 0) + return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // Since some of the sign extended bits are demanded, we know that the sign + // bit is demanded. + APInt InDemandedBits = InMask & NewMask; + InDemandedBits |= InSignBit; + InDemandedBits = InDemandedBits.trunc(InBits); + + if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, + KnownOne, TLO, Depth+1)) + return true; + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + + // If the sign bit is known zero, convert this to a zero extend. + if (KnownZero.intersects(InSignBit)) + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, + Op.getValueType(), + Op.getOperand(0))); + + // If the sign bit is known one, the top bits match. + if (KnownOne.intersects(InSignBit)) { + KnownOne |= NewBits; + assert((KnownZero & NewBits) == 0); + } else { // Otherwise, top bits aren't known. + assert((KnownOne & NewBits) == 0); + assert((KnownZero & NewBits) == 0); + } + break; + } + case ISD::ANY_EXTEND: { + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt InMask = NewMask.trunc(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), InMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + KnownZero = KnownZero.zext(BitWidth); + KnownOne = KnownOne.zext(BitWidth); + break; + } + case ISD::TRUNCATE: { + // Simplify the input, using demanded bit information, and compute the known + // zero/one bits live out. + unsigned OperandBitWidth = + Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + APInt TruncMask = NewMask.zext(OperandBitWidth); + if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + KnownZero = KnownZero.trunc(BitWidth); + KnownOne = KnownOne.trunc(BitWidth); + + // If the input is only used by this truncate, see if we can shrink it based + // on the known demanded bits. + if (Op.getOperand(0).getNode()->hasOneUse()) { + SDValue In = Op.getOperand(0); + switch (In.getOpcode()) { + default: break; + case ISD::SRL: + // Shrink SRL by a constant if none of the high bits shifted in are + // demanded. + if (TLO.LegalTypes() && + !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1)); + if (!ShAmt) + break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = TLO.DAG.getConstant(ShVal, dl, + getShiftAmountTy(Op.getValueType(), DL)); + } + + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + Shift)); + } + break; + } + } + + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + break; + } + case ISD::AssertZext: { + // AssertZext demands all of the high bits, plus any of the low bits + // demanded by its users. + EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); + if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask, + KnownZero, KnownOne, TLO, Depth+1)) + return true; + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + KnownZero |= ~InMask & NewMask; + break; + } + case ISD::BITCAST: + // If this is an FP->Int bitcast and if the sign bit is the only + // thing demanded, turn this into a FGETSIGN. + if (!TLO.LegalOperations() && + !Op.getValueType().isVector() && + !Op.getOperand(0).getValueType().isVector() && + NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && + Op.getOperand(0).getValueType().isFloatingPoint()) { + bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); + bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. + EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; + // Make a FGETSIGN + SHL to move the sign bit into the appropriate + // place. We expect the SHL to be eliminated by other optimizations. + SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0)); + unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits(); + if (!OpVTLegal && OpVTSizeInBits > 32) + Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign); + unsigned ShVal = Op.getValueType().getSizeInBits()-1; + SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, + Op.getValueType(), + Sign, ShAmt)); + } + } + break; + case ISD::ADD: + case ISD::MUL: + case ISD::SUB: { + // Add, Sub, and Mul don't demand any bits in positions beyond that + // of the highest bit demanded of them. + APInt LoMask = APInt::getLowBitsSet(BitWidth, + BitWidth - NewMask.countLeadingZeros()); + if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2, + KnownOne2, TLO, Depth+1)) + return true; + // See if the operation should be performed at a smaller bit width. + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + return true; + } + // FALL THROUGH + default: + // Just use computeKnownBits to compute output bits. + TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth); + break; + } + + // If we know the value of all of the demanded bits, return this as a + // constant. + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { + // Avoid folding to a constant if any OpaqueConstant is involved. + const SDNode *N = Op.getNode(); + for (SDNodeIterator I = SDNodeIterator::begin(N), + E = SDNodeIterator::end(N); I != E; ++I) { + SDNode *Op = *I; + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) + if (C->isOpaque()) + return false; + } + return TLO.CombineTo(Op, + TLO.DAG.getConstant(KnownOne, dl, Op.getValueType())); + } + + return false; +} + +/// Determine which of the bits specified in Mask are known to be either zero or +/// one and return them in the KnownZero/KnownOne bitsets. +void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, + APInt &KnownZero, + APInt &KnownOne, + const SelectionDAG &DAG, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use MaskedValueIsZero if you don't know whether Op" + " is a target node!"); + KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); +} + +/// This method can be implemented by targets that want to expose additional +/// information about sign bits to the DAG Combiner. +unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, + const SelectionDAG &, + unsigned Depth) const { + assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || + Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || + Op.getOpcode() == ISD::INTRINSIC_VOID) && + "Should use ComputeNumSignBits if you don't know whether Op" + " is a target node!"); + return 1; +} + +/// Test if the given value is known to have exactly one bit set. This differs +/// from computeKnownBits in that it doesn't need to determine which bit is set. +static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { + // A left-shift of a constant one will have exactly one bit set, because + // shifting the bit off the end is undefined. + if (Val.getOpcode() == ISD::SHL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue() == 1) + return true; + + // Similarly, a right-shift of a constant sign-bit will have exactly + // one bit set. + if (Val.getOpcode() == ISD::SRL) + if (ConstantSDNode *C = + dyn_cast<ConstantSDNode>(Val.getNode()->getOperand(0))) + if (C->getAPIntValue().isSignBit()) + return true; + + // More could be done here, though the above checks are enough + // to handle some common cases. + + // Fall back to computeKnownBits to catch other known cases. + EVT OpVT = Val.getValueType(); + unsigned BitWidth = OpVT.getScalarType().getSizeInBits(); + APInt KnownZero, KnownOne; + DAG.computeKnownBits(Val, KnownZero, KnownOne); + return (KnownZero.countPopulation() == BitWidth - 1) && + (KnownOne.countPopulation() == 1); +} + +bool TargetLowering::isConstTrueVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; + } + + switch (getBooleanContents(N->getValueType(0))) { + case UndefinedBooleanContent: + return CN->getAPIntValue()[0]; + case ZeroOrOneBooleanContent: + return CN->isOne(); + case ZeroOrNegativeOneBooleanContent: + return CN->isAllOnesValue(); + } + + llvm_unreachable("Invalid boolean contents"); +} + +bool TargetLowering::isConstFalseVal(const SDNode *N) const { + if (!N) + return false; + + const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); + if (!CN) { + const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); + if (!BV) + return false; + + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; + } + + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) + return !CN->getAPIntValue()[0]; + + return CN->isNullValue(); +} + +/// Try to simplify a setcc built with the specified operands and cc. If it is +/// unable to simplify it, return a null SDValue. +SDValue +TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, + ISD::CondCode Cond, bool foldBooleans, + DAGCombinerInfo &DCI, SDLoc dl) const { + SelectionDAG &DAG = DCI.DAG; + + // These setcc operations always fold. + switch (Cond) { + default: break; + case ISD::SETFALSE: + case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); + case ISD::SETTRUE: + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = + getBooleanContents(N0->getValueType(0)); + return DAG.getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, + VT); + } + } + + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isa<ConstantSDNode>(N0.getNode()) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); + + if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); + + // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an + // equality comparison, then we're just comparing whether X itself is + // zero. + if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && + N0.getOperand(0).getOpcode() == ISD::CTLZ && + N0.getOperand(1).getOpcode() == ISD::Constant) { + const APInt &ShAmt + = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { + if ((C1 == 0) == (Cond == ISD::SETEQ)) { + // (srl (ctlz x), 5) == 0 -> X != 0 + // (srl (ctlz x), 5) != 1 -> X != 0 + Cond = ISD::SETNE; + } else { + // (srl (ctlz x), 5) != 0 -> X == 0 + // (srl (ctlz x), 5) == 1 -> X == 0 + Cond = ISD::SETEQ; + } + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); + return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0), + Zero, Cond); + } + } + + SDValue CTPOP = N0; + // Look through truncs that don't change the value of a ctpop. + if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE) + CTPOP = N0.getOperand(0); + + if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP && + (N0 == CTPOP || N0.getValueType().getSizeInBits() > + Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) { + EVT CTVT = CTPOP.getValueType(); + SDValue CTOp = CTPOP.getOperand(0); + + // (ctpop x) u< 2 -> (x & x-1) == 0 + // (ctpop x) u> 1 -> (x & x-1) != 0 + if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){ + SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp, + DAG.getConstant(1, dl, CTVT)); + SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub); + ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE; + return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC); + } + + // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. + } + + // (zext x) == C --> x == (trunc C) + // (sext x) == C --> x == (trunc C) + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + DCI.isBeforeLegalize() && N0->hasOneUse()) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreExt; + bool Signed = false; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreExt = N0->getOperand(0); + } + } else if (N0->getOpcode() == ISD::SIGN_EXTEND) { + // SExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreExt = N0->getOperand(0); + Signed = true; + } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) { + // ZEXTLOAD / SEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; + } else if (LN0->getExtensionType() == ISD::SEXTLOAD) { + Signed = true; + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreExt = N0; + } + } + + // Figure out how many bits we need to preserve this constant. + unsigned ReqdBits = Signed ? + C1.getBitWidth() - C1.getNumSignBits() + 1 : + C1.getActiveBits(); + + // Make sure we're not losing bits from the constant. + if (MinBits > 0 && + MinBits < C1.getBitWidth() && + MinBits >= ReqdBits) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + + // If the LHS is '(and load, const)', the RHS is 0, + // the test is for equality or unsigned, and all 1 bits of the const are + // in the same partial word, see if we can shorten the load. + if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && + N0.getOpcode() == ISD::AND && C1 == 0 && + N0.getNode()->hasOneUse() && + isa<LoadSDNode>(N0.getOperand(0)) && + N0.getOperand(0).getNode()->hasOneUse() && + isa<ConstantSDNode>(N0.getOperand(1))) { + LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0)); + APInt bestMask; + unsigned bestWidth = 0, bestOffset = 0; + if (!Lod->isVolatile() && Lod->isUnindexed()) { + unsigned origWidth = N0.getValueType().getSizeInBits(); + unsigned maskWidth = origWidth; + // We can narrow (e.g.) 16-bit extending loads on 32-bit target to + // 8 bits, but have to be careful... + if (Lod->getExtensionType() != ISD::NON_EXTLOAD) + origWidth = Lod->getMemoryVT().getSizeInBits(); + const APInt &Mask = + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); + for (unsigned width = origWidth / 2; width>=8; width /= 2) { + APInt newMask = APInt::getLowBitsSet(maskWidth, width); + for (unsigned offset=0; offset<origWidth/width; offset++) { + if ((newMask & Mask) == Mask) { + if (!DAG.getDataLayout().isLittleEndian()) + bestOffset = (origWidth/width - offset - 1) * (width/8); + else + bestOffset = (uint64_t)offset * (width/8); + bestMask = Mask.lshr(offset * (width/8) * 8); + bestWidth = width; + break; + } + newMask = newMask << width; + } + } + } + if (bestWidth) { + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); + if (newVT.isRound()) { + EVT PtrType = Lod->getOperand(1).getValueType(); + SDValue Ptr = Lod->getBasePtr(); + if (bestOffset != 0) + Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(), + DAG.getConstant(bestOffset, dl, PtrType)); + unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); + SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, + Lod->getPointerInfo().getWithOffset(bestOffset), + false, false, false, NewAlign); + return DAG.getSetCC(dl, VT, + DAG.getNode(ISD::AND, dl, newVT, NewLoad, + DAG.getConstant(bestMask.trunc(bestWidth), + dl, newVT)), + DAG.getConstant(0LL, dl, newVT), Cond); + } + } + } + + // If the LHS is a ZERO_EXTEND, perform the comparison on the input. + if (N0.getOpcode() == ISD::ZERO_EXTEND) { + unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); + + // If the comparison constant has bits in the upper part, the + // zero-extended value could never match. + if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), + C1.getBitWidth() - InSize))) { + switch (Cond) { + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETEQ: return DAG.getConstant(0, dl, VT); + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETNE: return DAG.getConstant(1, dl, VT); + case ISD::SETGT: + case ISD::SETGE: + // True if the sign bit of C1 is set. + return DAG.getConstant(C1.isNegative(), dl, VT); + case ISD::SETLT: + case ISD::SETLE: + // True if the sign bit of C1 isn't set. + return DAG.getConstant(C1.isNonNegative(), dl, VT); + default: + break; + } + } + + // Otherwise, we can perform the comparison with the low bits. + switch (Cond) { + case ISD::SETEQ: + case ISD::SETNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: + case ISD::SETULE: { + EVT newVT = N0.getOperand(0).getValueType(); + if (DCI.isBeforeLegalizeOps() || + (isOperationLegal(ISD::SETCC, newVT) && + getCondCodeAction(Cond, newVT.getSimpleVT()) == Legal)) { + EVT NewSetCCVT = + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT); + SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT); + + SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), + NewConst, Cond); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); + } + break; + } + default: + break; // todo, be more careful with signed comparisons + } + } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); + unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); + EVT ExtDstTy = N0.getValueType(); + unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); + + // If the constant doesn't fit into the number of bits for the source of + // the sign extension, it is impossible for both sides to be equal. + if (C1.getMinSignedBits() > ExtSrcTyBits) + return DAG.getConstant(Cond == ISD::SETNE, dl, VT); + + SDValue ZextOp; + EVT Op0Ty = N0.getOperand(0).getValueType(); + if (Op0Ty == ExtSrcTy) { + ZextOp = N0.getOperand(0); + } else { + APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); + ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0), + DAG.getConstant(Imm, dl, Op0Ty)); + } + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(ZextOp.getNode()); + // Otherwise, make this a use of a zext. + return DAG.getSetCC(dl, VT, ZextOp, + DAG.getConstant(C1 & APInt::getLowBitsSet( + ExtDstTyBits, + ExtSrcTyBits), + dl, ExtDstTy), + Cond); + } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC + if (N0.getOpcode() == ISD::SETCC && + isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) { + bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1); + if (TrueWhenTrue) + return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); + // Invert the condition. + ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); + CC = ISD::getSetCCInverse(CC, + N0.getOperand(0).getValueType().isInteger()); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + } + + if ((N0.getOpcode() == ISD::XOR || + (N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR && + N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && + isa<ConstantSDNode>(N0.getOperand(1)) && + cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We + // can only do this if the top bits are known zero. + unsigned BitWidth = N0.getValueSizeInBits(); + if (DAG.MaskedValueIsZero(N0, + APInt::getHighBitsSet(BitWidth, + BitWidth-1))) { + // Okay, get the un-inverted input value. + SDValue Val; + if (N0.getOpcode() == ISD::XOR) + Val = N0.getOperand(0); + else { + assert(N0.getOpcode() == ISD::AND && + N0.getOperand(0).getOpcode() == ISD::XOR); + // ((X^1)&1)^1 -> X & 1 + Val = DAG.getNode(ISD::AND, dl, N0.getValueType(), + N0.getOperand(0).getOperand(0), + N0.getOperand(1)); + } + + return DAG.getSetCC(dl, VT, Val, N1, + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } else if (N1C->getAPIntValue() == 1 && + (VT == MVT::i1 || + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { + SDValue Op0 = N0; + if (Op0.getOpcode() == ISD::TRUNCATE) + Op0 = Op0.getOperand(0); + + if ((Op0.getOpcode() == ISD::XOR) && + Op0.getOperand(0).getOpcode() == ISD::SETCC && + Op0.getOperand(1).getOpcode() == ISD::SETCC) { + // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc) + Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; + return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), + Cond); + } + if (Op0.getOpcode() == ISD::AND && + isa<ConstantSDNode>(Op0.getOperand(1)) && + cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) { + // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. + if (Op0.getValueType().bitsGT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, dl, VT)); + else if (Op0.getValueType().bitsLT(VT)) + Op0 = DAG.getNode(ISD::AND, dl, VT, + DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)), + DAG.getConstant(1, dl, VT)); + + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, dl, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + if (Op0.getOpcode() == ISD::AssertZext && + cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1) + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, dl, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); + } + } + + APInt MinVal, MaxVal; + unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + if (ISD::isSignedIntSetCC(Cond)) { + MinVal = APInt::getSignedMinValue(OperandBitSize); + MaxVal = APInt::getSignedMaxValue(OperandBitSize); + } else { + MinVal = APInt::getMinValue(OperandBitSize); + MaxVal = APInt::getMaxValue(OperandBitSize); + } + + // Canonicalize GE/LE comparisons to use GT/LT comparisons. + if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { + if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + + if (Cond == ISD::SETLE || Cond == ISD::SETULE) { + if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true + // X <= C0 --> X < (C0 + 1) + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } + } + + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) + return DAG.getConstant(0, dl, VT); // X < MIN --> false + if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) + return DAG.getConstant(1, dl, VT); // X >= MIN --> true + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) + return DAG.getConstant(0, dl, VT); // X > MAX --> false + if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) + return DAG.getConstant(1, dl, VT); // X <= MAX --> true + + // Canonicalize setgt X, Min --> setne X, Min + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + // Canonicalize setlt X, Max --> setne X, Max + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE); + + // If we have setult X, 1, turn it into seteq X, 0 + if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MinVal, dl, N0.getValueType()), + ISD::SETEQ); + // If we have setugt X, Max-1, turn it into seteq X, Max + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(MaxVal, dl, N0.getValueType()), + ISD::SETEQ); + + // If we have "setcc X, C0", check to see if we can shrink the immediate + // by changing cc. + + // SETUGT X, SINTMAX -> SETLT X, 0 + if (Cond == ISD::SETUGT && + C1 == APInt::getSignedMaxValue(OperandBitSize)) + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(0, dl, N1.getValueType()), + ISD::SETLT); + + // SETULT X, SINTMIN -> SETGT X, -1 + if (Cond == ISD::SETULT && + C1 == APInt::getSignedMinValue(OperandBitSize)) { + SDValue ConstMinusOne = + DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl, + N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); + } + + // Fold bit comparisons when we can. + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + (VT == N0.getValueType() || + (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + N0.getOpcode() == ISD::AND) { + auto &DL = DAG.getDataLayout(); + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + EVT ShiftTy = DCI.isBeforeLegalize() + ? getPointerTy(DL) + : getShiftAmountTy(N0.getValueType(), DL); + if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 + // Perform the xform if the AND RHS is a single bit. + if (AndRHS->getAPIntValue().isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl, + ShiftTy))); + } + } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) { + // (X & 8) == 8 --> (X & 8) >> 3 + // Perform the xform if C1 is a single bit. + if (C1.isPowerOf2()) { + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(C1.logBase2(), dl, + ShiftTy))); + } + } + } + } + + if (C1.getMinSignedBits() <= 64 && + !isLegalICmpImmediate(C1.getSExtValue())) { + // (X & -256) == 256 -> (X >> 8) == 1 + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + const APInt &AndRHSC = AndRHS->getAPIntValue(); + if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) { + unsigned ShiftBits = AndRHSC.countTrailingZeros(); + auto &DL = DAG.getDataLayout(); + EVT ShiftTy = DCI.isBeforeLegalize() + ? getPointerTy(DL) + : getShiftAmountTy(N0.getValueType(), DL); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, + ShiftTy)); + SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond); + } + } + } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE || + Cond == ISD::SETULE || Cond == ISD::SETUGT) { + bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT); + // X < 0x100000000 -> (X >> 32) < 1 + // X >= 0x100000000 -> (X >> 32) >= 1 + // X <= 0x0ffffffff -> (X >> 32) < 1 + // X > 0x0ffffffff -> (X >> 32) >= 1 + unsigned ShiftBits; + APInt NewC = C1; + ISD::CondCode NewCond = Cond; + if (AdjOne) { + ShiftBits = C1.countTrailingOnes(); + NewC = NewC + 1; + NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; + } else { + ShiftBits = C1.countTrailingZeros(); + } + NewC = NewC.lshr(ShiftBits); + if (ShiftBits && NewC.getMinSignedBits() <= 64 && + isLegalICmpImmediate(NewC.getSExtValue())) { + auto &DL = DAG.getDataLayout(); + EVT ShiftTy = DCI.isBeforeLegalize() + ? getPointerTy(DL) + : getShiftAmountTy(N0.getValueType(), DL); + EVT CmpTy = N0.getValueType(); + SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0, + DAG.getConstant(ShiftBits, dl, ShiftTy)); + SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy); + return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond); + } + } + } + } + + if (isa<ConstantFPSDNode>(N0.getNode())) { + // Constant fold or commute setcc. + SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl); + if (O.getNode()) return O; + } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) { + // If the RHS of an FP comparison is a constant, simplify it away in + // some cases. + if (CFP->getValueAPF().isNaN()) { + // If an operand is known to be a nan, we can fold it. + switch (ISD::getUnorderedFlavor(Cond)) { + default: llvm_unreachable("Unknown flavor!"); + case 0: // Known false. + return DAG.getConstant(0, dl, VT); + case 1: // Known true. + return DAG.getConstant(1, dl, VT); + case 2: // Undefined. + return DAG.getUNDEF(VT); + } + } + + // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the + // constant if knowing that the operand is non-nan is enough. We prefer to + // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to + // materialize 0.0. + if (Cond == ISD::SETO || Cond == ISD::SETUO) + return DAG.getSetCC(dl, VT, N0, N0, Cond); + + // If the condition is not legal, see if we can find an equivalent one + // which is legal. + if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { + // If the comparison was an awkward floating-point == or != and one of + // the comparison operands is infinity or negative infinity, convert the + // condition to a less-awkward <= or >=. + if (CFP->getValueAPF().isInfinity()) { + if (CFP->getValueAPF().isNegative()) { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); + } else { + if (Cond == ISD::SETOEQ && + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); + if (Cond == ISD::SETUEQ && + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); + if (Cond == ISD::SETUNE && + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); + if (Cond == ISD::SETONE && + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); + } + } + } + } + + if (N0 == N1) { + // The sext(setcc()) => setcc() optimization relies on the appropriate + // constant being emitted. + uint64_t EqVal = 0; + switch (getBooleanContents(N0.getValueType())) { + case UndefinedBooleanContent: + case ZeroOrOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond); + break; + case ZeroOrNegativeOneBooleanContent: + EqVal = ISD::isTrueWhenEqual(Cond) ? -1 : 0; + break; + } + + // We can always fold X == X for integer setcc's. + if (N0.getValueType().isInteger()) { + return DAG.getConstant(EqVal, dl, VT); + } + unsigned UOF = ISD::getUnorderedFlavor(Cond); + if (UOF == 2) // FP operators that are undefined on NaNs. + return DAG.getConstant(EqVal, dl, VT); + if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) + return DAG.getConstant(EqVal, dl, VT); + // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO + // if it is not already. + ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; + if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || + getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) + return DAG.getSetCC(dl, VT, N0, N1, NewCond); + } + + if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + N0.getValueType().isInteger()) { + if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || + N0.getOpcode() == ISD::XOR) { + // Simplify (X+Y) == (X+Z) --> Y == Z + if (N0.getOpcode() == N1.getOpcode()) { + if (N0.getOperand(0) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond); + if (N0.getOperand(1) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond); + if (DAG.isCommutativeBinOp(N0.getOpcode())) { + // If X op Y == Y op X, try other combinations. + if (N0.getOperand(0) == N1.getOperand(1)) + return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0), + Cond); + if (N0.getOperand(1) == N1.getOperand(0)) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1), + Cond); + } + } + + // If RHS is a legal immediate value for a compare instruction, we need + // to be careful about increasing register pressure needlessly. + bool LegalRHSImm = false; + + if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) { + if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { + // Turn (X+C1) == C2 --> X == C2-C1 + if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) { + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(RHSC->getAPIntValue()- + LHSR->getAPIntValue(), + dl, N0.getValueType()), Cond); + } + + // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. + if (N0.getOpcode() == ISD::XOR) + // If we know that all of the inverted bits are zero, don't bother + // performing the inversion. + if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) + return + DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(LHSR->getAPIntValue() ^ + RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); + } + + // Turn (C1-X) == C2 --> X == C1-C2 + if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { + if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) { + return + DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(SUBC->getAPIntValue() - + RHSC->getAPIntValue(), + dl, N0.getValueType()), + Cond); + } + } + + // Could RHSC fold directly into a compare? + if (RHSC->getValueType(0).getSizeInBits() <= 64) + LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue()); + } + + // Simplify (X+Z) == X --> Z == 0 + // Don't do this if X is an immediate that can fold into a cmp + // instruction and X+Z has other uses. It could be an induction variable + // chain, and the transform would increase register pressure. + if (!LegalRHSImm || N0.getNode()->hasOneUse()) { + if (N0.getOperand(0) == N1) + return DAG.getSetCC(dl, VT, N0.getOperand(1), + DAG.getConstant(0, dl, N0.getValueType()), Cond); + if (N0.getOperand(1) == N1) { + if (DAG.isCommutativeBinOp(N0.getOpcode())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), + DAG.getConstant(0, dl, N0.getValueType()), + Cond); + if (N0.getNode()->hasOneUse()) { + assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); + auto &DL = DAG.getDataLayout(); + // (Z-X) == X --> Z == X<<1 + SDValue SH = DAG.getNode( + ISD::SHL, dl, N1.getValueType(), N1, + DAG.getConstant(1, dl, + getShiftAmountTy(N1.getValueType(), DL))); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); + } + } + } + } + + if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || + N1.getOpcode() == ISD::XOR) { + // Simplify X == (X+Z) --> Z == 0 + if (N1.getOperand(0) == N0) + return DAG.getSetCC(dl, VT, N1.getOperand(1), + DAG.getConstant(0, dl, N1.getValueType()), Cond); + if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) + return DAG.getSetCC(dl, VT, N1.getOperand(0), + DAG.getConstant(0, dl, N1.getValueType()), Cond); + if (N1.getNode()->hasOneUse()) { + assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); + auto &DL = DAG.getDataLayout(); + // X == (Z-X) --> X<<1 == Z + SDValue SH = DAG.getNode( + ISD::SHL, dl, N1.getValueType(), N0, + DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL))); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(SH.getNode()); + return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); + } + } + } + + // Simplify x&y == y to x&y != 0 if y has exactly one bit set. + // Note that where y is variable and is known to have at most + // one bit set (for example, if it is z&1) we cannot do this; + // the expressions are not equivalent when y==0. + if (N0.getOpcode() == ISD::AND) + if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { + if (ValueHasExactlyOneBitSet(N1, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, dl, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } + } + } + if (N1.getOpcode() == ISD::AND) + if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { + if (ValueHasExactlyOneBitSet(N0, DAG)) { + Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N1.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, dl, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } + } + } + } + + // Fold away ALL boolean setcc's. + SDValue Temp; + if (N0.getValueType() == MVT::i1 && foldBooleans) { + switch (Cond) { + default: llvm_unreachable("Unknown integer setcc!"); + case ISD::SETEQ: // X == Y -> ~(X^Y) + Temp = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + N0 = DAG.getNOT(dl, Temp, MVT::i1); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETNE: // X != Y --> (X^Y) + N0 = DAG.getNode(ISD::XOR, dl, MVT::i1, N0, N1); + break; + case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y + case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X + case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::AND, dl, MVT::i1, N0, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y + case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y + Temp = DAG.getNOT(dl, N0, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N1, Temp); + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(Temp.getNode()); + break; + case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X + case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X + Temp = DAG.getNOT(dl, N1, MVT::i1); + N0 = DAG.getNode(ISD::OR, dl, MVT::i1, N0, Temp); + break; + } + if (VT != MVT::i1) { + if (!DCI.isCalledByLegalizer()) + DCI.AddToWorklist(N0.getNode()); + // FIXME: If running after legalize, we probably can't do this. + N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0); + } + return N0; + } + + // Could not fold it. + return SDValue(); +} + +/// Returns true (and the GlobalValue and the offset) if the node is a +/// GlobalAddress + offset. +bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, + int64_t &Offset) const { + if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) { + GA = GASD->getGlobal(); + Offset += GASD->getOffset(); + return true; + } + + if (N->getOpcode() == ISD::ADD) { + SDValue N1 = N->getOperand(0); + SDValue N2 = N->getOperand(1); + if (isGAPlusOffset(N1.getNode(), GA, Offset)) { + if (auto *V = dyn_cast<ConstantSDNode>(N2)) { + Offset += V->getSExtValue(); + return true; + } + } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) { + if (auto *V = dyn_cast<ConstantSDNode>(N1)) { + Offset += V->getSExtValue(); + return true; + } + } + } + + return false; +} + +SDValue TargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + // Default implementation: no optimization. + return SDValue(); +} + +//===----------------------------------------------------------------------===// +// Inline Assembler Implementation Methods +//===----------------------------------------------------------------------===// + +TargetLowering::ConstraintType +TargetLowering::getConstraintType(StringRef Constraint) const { + unsigned S = Constraint.size(); + + if (S == 1) { + switch (Constraint[0]) { + default: break; + case 'r': return C_RegisterClass; + case 'm': // memory + case 'o': // offsetable + case 'V': // not offsetable + return C_Memory; + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 'E': // Floating Point Constant + case 'F': // Floating Point Constant + case 's': // Relocatable Constant + case 'p': // Address. + case 'X': // Allow ANY value. + case 'I': // Target registers. + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case '<': + case '>': + return C_Other; + } + } + + if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { + if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}" + return C_Memory; + return C_Register; + } + return C_Unknown; +} + +/// Try to replace an X constraint, which matches anything, with another that +/// has more specific requirements based on the type of the corresponding +/// operand. +const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ + if (ConstraintVT.isInteger()) + return "r"; + if (ConstraintVT.isFloatingPoint()) + return "f"; // works for many targets + return nullptr; +} + +/// Lower the specified operand into the Ops vector. +/// If it is invalid, don't add anything to Ops. +void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, + std::string &Constraint, + std::vector<SDValue> &Ops, + SelectionDAG &DAG) const { + + if (Constraint.length() > 1) return; + + char ConstraintLetter = Constraint[0]; + switch (ConstraintLetter) { + default: break; + case 'X': // Allows any operand; labels (basic block) use this. + if (Op.getOpcode() == ISD::BasicBlock) { + Ops.push_back(Op); + return; + } + // fall through + case 'i': // Simple Integer or Relocatable Constant + case 'n': // Simple Integer + case 's': { // Relocatable Constant + // These operands are interested in values of the form (GV+C), where C may + // be folded in as an offset of GV, or it may be explicitly added. Also, it + // is possible and fine if either GV or C are missing. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); + GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); + + // If we have "(add GV, C)", pull out GV/C + if (Op.getOpcode() == ISD::ADD) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); + if (!C || !GA) { + C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); + GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); + } + if (!C || !GA) + C = nullptr, GA = nullptr; + } + + // If we find a valid operand, map to the TargetXXX version so that the + // value itself doesn't get selected. + if (GA) { // Either &GV or &GV+C + if (ConstraintLetter != 'n') { + int64_t Offs = GA->getOffset(); + if (C) Offs += C->getZExtValue(); + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C ? SDLoc(C) : SDLoc(), + Op.getValueType(), Offs)); + } + return; + } + if (C) { // just C, no GV. + // Simple constants are not allowed for 's'. + if (ConstraintLetter != 's') { + // gcc prints these as sign extended. Sign extend value to 64 bits + // now; without this it would get ZExt'd later in + // ScheduleDAGSDNodes::EmitNode, which is very generic. + Ops.push_back(DAG.getTargetConstant(C->getAPIntValue().getSExtValue(), + SDLoc(C), MVT::i64)); + } + return; + } + break; + } + } +} + +std::pair<unsigned, const TargetRegisterClass *> +TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI, + StringRef Constraint, + MVT VT) const { + if (Constraint.empty() || Constraint[0] != '{') + return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr)); + assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); + + // Remove the braces from around the name. + StringRef RegName(Constraint.data()+1, Constraint.size()-2); + + std::pair<unsigned, const TargetRegisterClass*> R = + std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr)); + + // Figure out which register class contains this reg. + for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), + E = RI->regclass_end(); RCI != E; ++RCI) { + const TargetRegisterClass *RC = *RCI; + + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + if (!isLegalRC(RC)) + continue; + + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) { + if (RegName.equals_lower(RI->getName(*I))) { + std::pair<unsigned, const TargetRegisterClass*> S = + std::make_pair(*I, RC); + + // If this register class has the requested value type, return it, + // otherwise keep searching and return the first class found + // if no other is found which explicitly has the requested type. + if (RC->hasType(VT)) + return S; + else if (!R.second) + R = S; + } + } + } + + return R; +} + +//===----------------------------------------------------------------------===// +// Constraint Selection. + +/// Return true of this is an input operand that is a matching constraint like +/// "4". +bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return isdigit(static_cast<unsigned char>(ConstraintCode[0])); +} + +/// If this is an input matching constraint, this method returns the output +/// operand it matches. +unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const { + assert(!ConstraintCode.empty() && "No known constraint!"); + return atoi(ConstraintCode.c_str()); +} + +/// Split up the constraint string from the inline assembly value into the +/// specific constraints and their prefixes, and also tie in the associated +/// operand values. +/// If this returns an empty vector, and if the constraint string itself +/// isn't empty, there was an error parsing. +TargetLowering::AsmOperandInfoVector +TargetLowering::ParseConstraints(const DataLayout &DL, + const TargetRegisterInfo *TRI, + ImmutableCallSite CS) const { + /// Information about all of the constraints. + AsmOperandInfoVector ConstraintOperands; + const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); + unsigned maCount = 0; // Largest number of multiple alternative constraints. + + // Do a prepass over the constraints, canonicalizing them, and building up the + // ConstraintOperands list. + unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. + unsigned ResNo = 0; // ResNo - The result number of the next output. + + for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) { + ConstraintOperands.emplace_back(std::move(CI)); + AsmOperandInfo &OpInfo = ConstraintOperands.back(); + + // Update multiple alternative constraint count. + if (OpInfo.multipleAlternatives.size() > maCount) + maCount = OpInfo.multipleAlternatives.size(); + + OpInfo.ConstraintVT = MVT::Other; + + // Compute the value type for each operand. + switch (OpInfo.Type) { + case InlineAsm::isOutput: + // Indirect outputs just consume an argument. + if (OpInfo.isIndirect) { + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + } + + // The return value of the call is this value. As such, there is no + // corresponding argument. + assert(!CS.getType()->isVoidTy() && + "Bad inline asm!"); + if (StructType *STy = dyn_cast<StructType>(CS.getType())) { + OpInfo.ConstraintVT = + getSimpleValueType(DL, STy->getElementType(ResNo)); + } else { + assert(ResNo == 0 && "Asm only has one result!"); + OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType()); + } + ++ResNo; + break; + case InlineAsm::isInput: + OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); + break; + case InlineAsm::isClobber: + // Nothing to do. + break; + } + + if (OpInfo.CallOperandVal) { + llvm::Type *OpTy = OpInfo.CallOperandVal->getType(); + if (OpInfo.isIndirect) { + llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); + if (!PtrTy) + report_fatal_error("Indirect operand for inline asm not a pointer!"); + OpTy = PtrTy->getElementType(); + } + + // Look for vector wrapped in a struct. e.g. { <16 x i8> }. + if (StructType *STy = dyn_cast<StructType>(OpTy)) + if (STy->getNumElements() == 1) + OpTy = STy->getElementType(0); + + // If OpTy is not a single value, it may be a struct/union that we + // can tile with integers. + if (!OpTy->isSingleValueType() && OpTy->isSized()) { + unsigned BitSize = DL.getTypeSizeInBits(OpTy); + switch (BitSize) { + default: break; + case 1: + case 8: + case 16: + case 32: + case 64: + case 128: + OpInfo.ConstraintVT = + MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); + break; + } + } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) { + unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); + } else { + OpInfo.ConstraintVT = MVT::getVT(OpTy, true); + } + } + } + + // If we have multiple alternative constraints, select the best alternative. + if (!ConstraintOperands.empty()) { + if (maCount) { + unsigned bestMAIndex = 0; + int bestWeight = -1; + // weight: -1 = invalid match, and 0 = so-so match to 5 = good match. + int weight = -1; + unsigned maIndex; + // Compute the sums of the weights for each alternative, keeping track + // of the best (highest weight) one so far. + for (maIndex = 0; maIndex < maCount; ++maIndex) { + int weightSum = 0; + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + if (OpInfo.Type == InlineAsm::isClobber) + continue; + + // If this is an output operand with a matching input operand, + // look up the matching input. If their types mismatch, e.g. one + // is an integer, the other is floating point, or their sizes are + // different, flag it as an maCantMatch. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (OpInfo.ConstraintVT.getSizeInBits() != + Input.ConstraintVT.getSizeInBits())) { + weightSum = -1; // Can't match. + break; + } + } + } + weight = getMultipleConstraintMatchWeight(OpInfo, maIndex); + if (weight == -1) { + weightSum = -1; + break; + } + weightSum += weight; + } + // Update best. + if (weightSum > bestWeight) { + bestWeight = weightSum; + bestMAIndex = maIndex; + } + } + + // Now select chosen alternative in each constraint. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& cInfo = ConstraintOperands[cIndex]; + if (cInfo.Type == InlineAsm::isClobber) + continue; + cInfo.selectAlternative(bestMAIndex); + } + } + } + + // Check and hook up tied operands, choose constraint code to use. + for (unsigned cIndex = 0, eIndex = ConstraintOperands.size(); + cIndex != eIndex; ++cIndex) { + AsmOperandInfo& OpInfo = ConstraintOperands[cIndex]; + + // If this is an output operand with a matching input operand, look up the + // matching input. If their types mismatch, e.g. one is an integer, the + // other is floating point, or their sizes are different, flag it as an + // error. + if (OpInfo.hasMatchingInput()) { + AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; + + if (OpInfo.ConstraintVT != Input.ConstraintVT) { + std::pair<unsigned, const TargetRegisterClass *> MatchRC = + getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, + OpInfo.ConstraintVT); + std::pair<unsigned, const TargetRegisterClass *> InputRC = + getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, + Input.ConstraintVT); + if ((OpInfo.ConstraintVT.isInteger() != + Input.ConstraintVT.isInteger()) || + (MatchRC.second != InputRC.second)) { + report_fatal_error("Unsupported asm: input constraint" + " with a matching output constraint of" + " incompatible type!"); + } + } + } + } + + return ConstraintOperands; +} + +/// Return an integer indicating how general CT is. +static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { + switch (CT) { + case TargetLowering::C_Other: + case TargetLowering::C_Unknown: + return 0; + case TargetLowering::C_Register: + return 1; + case TargetLowering::C_RegisterClass: + return 2; + case TargetLowering::C_Memory: + return 3; + } + llvm_unreachable("Invalid constraint type"); +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getMultipleConstraintMatchWeight( + AsmOperandInfo &info, int maIndex) const { + InlineAsm::ConstraintCodeVector *rCodes; + if (maIndex >= (int)info.multipleAlternatives.size()) + rCodes = &info.Codes; + else + rCodes = &info.multipleAlternatives[maIndex].Codes; + ConstraintWeight BestWeight = CW_Invalid; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = rCodes->size(); i != e; ++i) { + ConstraintWeight weight = + getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str()); + if (weight > BestWeight) + BestWeight = weight; + } + + return BestWeight; +} + +/// Examine constraint type and operand type and determine a weight value. +/// This object must already have been set up with the operand type +/// and the current alternative constraint selected. +TargetLowering::ConstraintWeight + TargetLowering::getSingleConstraintMatchWeight( + AsmOperandInfo &info, const char *constraint) const { + ConstraintWeight weight = CW_Invalid; + Value *CallOperandVal = info.CallOperandVal; + // If we don't have a value, we can't do a match, + // but allow it at the lowest weight. + if (!CallOperandVal) + return CW_Default; + // Look at the constraint type. + switch (*constraint) { + case 'i': // immediate integer. + case 'n': // immediate integer with a known value. + if (isa<ConstantInt>(CallOperandVal)) + weight = CW_Constant; + break; + case 's': // non-explicit intregal immediate. + if (isa<GlobalValue>(CallOperandVal)) + weight = CW_Constant; + break; + case 'E': // immediate float if host format. + case 'F': // immediate float. + if (isa<ConstantFP>(CallOperandVal)) + weight = CW_Constant; + break; + case '<': // memory operand with autodecrement. + case '>': // memory operand with autoincrement. + case 'm': // memory operand. + case 'o': // offsettable memory operand + case 'V': // non-offsettable memory operand + weight = CW_Memory; + break; + case 'r': // general register. + case 'g': // general register, memory operand or immediate integer. + // note: Clang converts "g" to "imr". + if (CallOperandVal->getType()->isIntegerTy()) + weight = CW_Register; + break; + case 'X': // any operand. + default: + weight = CW_Default; + break; + } + return weight; +} + +/// If there are multiple different constraints that we could pick for this +/// operand (e.g. "imr") try to pick the 'best' one. +/// This is somewhat tricky: constraints fall into four classes: +/// Other -> immediates and magic values +/// Register -> one specific register +/// RegisterClass -> a group of regs +/// Memory -> memory +/// Ideally, we would pick the most specific constraint possible: if we have +/// something that fits into a register, we would pick it. The problem here +/// is that if we have something that could either be in a register or in +/// memory that use of the register could cause selection of *other* +/// operands to fail: they might only succeed if we pick memory. Because of +/// this the heuristic we use is: +/// +/// 1) If there is an 'other' constraint, and if the operand is valid for +/// that constraint, use it. This makes us take advantage of 'i' +/// constraints when available. +/// 2) Otherwise, pick the most general constraint present. This prefers +/// 'm' over 'r', for example. +/// +static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, + const TargetLowering &TLI, + SDValue Op, SelectionDAG *DAG) { + assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); + unsigned BestIdx = 0; + TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; + int BestGenerality = -1; + + // Loop over the options, keeping track of the most general one. + for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { + TargetLowering::ConstraintType CType = + TLI.getConstraintType(OpInfo.Codes[i]); + + // If this is an 'other' constraint, see if the operand is valid for it. + // For example, on X86 we might have an 'rI' constraint. If the operand + // is an integer in the range [0..31] we want to use I (saving a load + // of a register), otherwise we must use 'r'. + if (CType == TargetLowering::C_Other && Op.getNode()) { + assert(OpInfo.Codes[i].size() == 1 && + "Unhandled multi-letter 'other' constraint"); + std::vector<SDValue> ResultOps; + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i], + ResultOps, *DAG); + if (!ResultOps.empty()) { + BestType = CType; + BestIdx = i; + break; + } + } + + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + + // This constraint letter is more general than the previous one, use it. + int Generality = getConstraintGenerality(CType); + if (Generality > BestGenerality) { + BestType = CType; + BestIdx = i; + BestGenerality = Generality; + } + } + + OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; + OpInfo.ConstraintType = BestType; +} + +/// Determines the constraint code and constraint type to use for the specific +/// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. +void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, + SDValue Op, + SelectionDAG *DAG) const { + assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); + + // Single-letter constraints ('r') are very common. + if (OpInfo.Codes.size() == 1) { + OpInfo.ConstraintCode = OpInfo.Codes[0]; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } else { + ChooseConstraint(OpInfo, *this, Op, DAG); + } + + // 'X' matches anything. + if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { + // Labels and constants are handled elsewhere ('X' is the only thing + // that matches labels). For Functions, the type here is the type of + // the result, which is not what we want to look at; leave them alone. + Value *v = OpInfo.CallOperandVal; + if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) { + OpInfo.CallOperandVal = v; + return; + } + + // Otherwise, try to resolve it to something we know about by looking at + // the actual operand type. + if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { + OpInfo.ConstraintCode = Repl; + OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); + } + } +} + +/// \brief Given an exact SDIV by a constant, create a multiplication +/// with the multiplicative inverse of the constant. +static SDValue BuildExactSDIV(const TargetLowering &TLI, SDValue Op1, APInt d, + SDLoc dl, SelectionDAG &DAG, + std::vector<SDNode *> &Created) { + assert(d != 0 && "Division by zero!"); + + // Shift the value upfront if it is even, so the LSB is one. + unsigned ShAmt = d.countTrailingZeros(); + if (ShAmt) { + // TODO: For UDIV use SRL instead of SRA. + SDValue Amt = + DAG.getConstant(ShAmt, dl, TLI.getShiftAmountTy(Op1.getValueType(), + DAG.getDataLayout())); + SDNodeFlags Flags; + Flags.setExact(true); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, &Flags); + Created.push_back(Op1.getNode()); + d = d.ashr(ShAmt); + } + + // Calculate the multiplicative inverse, using Newton's method. + APInt t, xn = d; + while ((t = d*xn) != 1) + xn *= APInt(d.getBitWidth(), 2) - t; + + SDValue Op2 = DAG.getConstant(xn, dl, Op1.getValueType()); + SDValue Mul = DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); + Created.push_back(Mul.getNode()); + return Mul; +} + +SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, + std::vector<SDNode *> *Created) const { + AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isIntDivCheap(N->getValueType(0), Attr)) + return SDValue(N,0); // Lower SDIV as SDIV + return SDValue(); +} + +/// \brief Given an ISD::SDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue TargetLowering::BuildSDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode *> *Created) const { + assert(Created && "No vector to hold sdiv ops."); + + EVT VT = N->getValueType(0); + SDLoc dl(N); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // If the sdiv has an 'exact' bit we can use a simpler lowering. + if (cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact()) + return BuildExactSDIV(*this, N->getOperand(0), Divisor, dl, DAG, *Created); + + APInt::ms magics = Divisor.magic(); + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + SDValue Q; + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : + isOperationLegalOrCustom(ISD::MULHS, VT)) + Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), + DAG.getConstant(magics.m, dl, VT)); + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), + N->getOperand(0), + DAG.getConstant(magics.m, dl, VT)).getNode(), 1); + else + return SDValue(); // No mulhs or equvialent + // If d > 0 and m < 0, add the numerator + if (Divisor.isStrictlyPositive() && magics.m.isNegative()) { + Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0)); + Created->push_back(Q.getNode()); + } + // If d < 0 and m > 0, subtract the numerator. + if (Divisor.isNegative() && magics.m.isStrictlyPositive()) { + Q = DAG.getNode(ISD::SUB, dl, VT, Q, N->getOperand(0)); + Created->push_back(Q.getNode()); + } + auto &DL = DAG.getDataLayout(); + // Shift right algebraic if shift value is nonzero + if (magics.s > 0) { + Q = DAG.getNode( + ISD::SRA, dl, VT, Q, + DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); + Created->push_back(Q.getNode()); + } + // Extract the sign bit and add it to the quotient + SDValue T = + DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, + getShiftAmountTy(Q.getValueType(), DL))); + Created->push_back(T.getNode()); + return DAG.getNode(ISD::ADD, dl, VT, Q, T); +} + +/// \brief Given an ISD::UDIV node expressing a divide by constant, +/// return a DAG expression to select that will generate the same value by +/// multiplying by a magic number. +/// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide". +SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor, + SelectionDAG &DAG, bool IsAfterLegalization, + std::vector<SDNode *> *Created) const { + assert(Created && "No vector to hold udiv ops."); + + EVT VT = N->getValueType(0); + SDLoc dl(N); + auto &DL = DAG.getDataLayout(); + + // Check to see if we can do this. + // FIXME: We should be more aggressive here. + if (!isTypeLegal(VT)) + return SDValue(); + + // FIXME: We should use a narrower constant when the upper + // bits are known to be zero. + APInt::mu magics = Divisor.magicu(); + + SDValue Q = N->getOperand(0); + + // If the divisor is even, we can avoid using the expensive fixup by shifting + // the divided value upfront. + if (magics.a != 0 && !Divisor[0]) { + unsigned Shift = Divisor.countTrailingZeros(); + Q = DAG.getNode( + ISD::SRL, dl, VT, Q, + DAG.getConstant(Shift, dl, getShiftAmountTy(Q.getValueType(), DL))); + Created->push_back(Q.getNode()); + + // Get magic number for the shifted divisor. + magics = Divisor.lshr(Shift).magicu(Shift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } + + // Multiply the numerator (operand 0) by the magic value + // FIXME: We should support doing a MUL in a wider type + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : + isOperationLegalOrCustom(ISD::MULHU, VT)) + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, dl, VT)); + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, + DAG.getConstant(magics.m, dl, VT)).getNode(), 1); + else + return SDValue(); // No mulhu or equvialent + + Created->push_back(Q.getNode()); + + if (magics.a == 0) { + assert(magics.s < Divisor.getBitWidth() && + "We shouldn't generate an undefined shift!"); + return DAG.getNode( + ISD::SRL, dl, VT, Q, + DAG.getConstant(magics.s, dl, getShiftAmountTy(Q.getValueType(), DL))); + } else { + SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode( + ISD::SRL, dl, VT, NPQ, + DAG.getConstant(1, dl, getShiftAmountTy(NPQ.getValueType(), DL))); + Created->push_back(NPQ.getNode()); + NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); + Created->push_back(NPQ.getNode()); + return DAG.getNode( + ISD::SRL, dl, VT, NPQ, + DAG.getConstant(magics.s - 1, dl, + getShiftAmountTy(NPQ.getValueType(), DL))); + } +} + +bool TargetLowering:: +verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const { + if (!isa<ConstantSDNode>(Op.getOperand(0))) { + DAG.getContext()->emitError("argument to '__builtin_return_address' must " + "be a constant integer"); + return true; + } + + return false; +} + +//===----------------------------------------------------------------------===// +// Legalization Utilities +//===----------------------------------------------------------------------===// + +bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, + SelectionDAG &DAG, SDValue LL, SDValue LH, + SDValue RL, SDValue RH) const { + EVT VT = N->getValueType(0); + SDLoc dl(N); + + bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT); + bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT); + bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT); + bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT); + if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) { + unsigned OuterBitSize = VT.getSizeInBits(); + unsigned InnerBitSize = HiLoVT.getSizeInBits(); + unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0)); + unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1)); + + // LL, LH, RL, and RH must be either all NULL or all set to a value. + assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) || + (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode())); + + if (!LL.getNode() && !RL.getNode() && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0)); + RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1)); + } + + if (!LL.getNode()) + return false; + + APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize); + if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) && + DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) { + // The inputs are both zero-extended. + if (HasUMUL_LOHI) { + // We can emit a umul_lohi. + Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHU) { + // We can emit a mulhu+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + return true; + } + } + if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) { + // The input values are both sign-extended. + if (HasSMUL_LOHI) { + // We can emit a smul_lohi. + Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL, + RL); + Hi = SDValue(Lo.getNode(), 1); + return true; + } + if (HasMULHS) { + // We can emit a mulhs+mul. + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL); + return true; + } + } + + if (!LH.getNode() && !RH.getNode() && + isOperationLegalOrCustom(ISD::SRL, VT) && + isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) { + auto &DL = DAG.getDataLayout(); + unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits(); + SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL)); + LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift); + LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH); + RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift); + RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH); + } + + if (!LH.getNode()) + return false; + + if (HasUMUL_LOHI) { + // Lo,Hi = umul LHS, RHS. + SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl, + DAG.getVTList(HiLoVT, HiLoVT), LL, RL); + Lo = UMulLOHI; + Hi = UMulLOHI.getValue(1); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + if (HasMULHU) { + Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL); + Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL); + RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH); + LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH); + Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH); + return true; + } + } + return false; +} + +bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result, + SelectionDAG &DAG) const { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + SDLoc dl(SDValue(Node, 0)); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + return false; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT); + SDValue Bias = DAG.getConstant(127, dl, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), dl, + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + auto &DL = DAG.getDataLayout(); + SDValue ExponentBits = DAG.getNode( + ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode( + ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, dl, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + R = DAG.getSelectCC( + dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, getShiftAmountTy(IntVT, DL))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, getShiftAmountTy(IntVT, DL))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT), + DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT); + return true; +} + +//===----------------------------------------------------------------------===// +// Implementation of Emulated TLS Model +//===----------------------------------------------------------------------===// + +SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, + SelectionDAG &DAG) const { + // Access to address of TLS varialbe xyz is lowered to a function call: + // __emutls_get_address( address of global variable named "__emutls_v.xyz" ) + EVT PtrVT = getPointerTy(DAG.getDataLayout()); + PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext()); + SDLoc dl(GA); + + ArgListTy Args; + ArgListEntry Entry; + std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str(); + Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent()); + StringRef EmuTlsVarName(NameString); + GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName); + if (!EmuTlsVar) + EmuTlsVar = dyn_cast_or_null<GlobalVariable>( + VariableModule->getOrInsertGlobal(EmuTlsVarName, VoidPtrType)); + Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT); + Entry.Ty = VoidPtrType; + Args.push_back(Entry); + + SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT); + + TargetLowering::CallLoweringInfo CLI(DAG); + CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()); + CLI.setCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args), 0); + std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); + + // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. + // At last for X86 targets, maybe good for other targets too? + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setAdjustsStack(true); // Is this only for X86 target? + MFI->setHasCalls(true); + + assert((GA->getOffset() == 0) && + "Emulated TLS must have zero offset in GlobalAddressSDNode"); + return CallResult.first; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp new file mode 100644 index 0000000..00db942 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -0,0 +1,19 @@ +//===-- TargetSelectionDAGInfo.cpp - SelectionDAG Info --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetSelectionDAGInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetSelectionDAGInfo.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { +} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp new file mode 100644 index 0000000..b12e943 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShadowStackGC.cpp @@ -0,0 +1,55 @@ +//===-- ShadowStackGC.cpp - GC support for uncooperative targets ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics for targets that do +// not natively support them (which includes the C backend). Note that the code +// generated is not quite as efficient as algorithms which generate stack maps +// to identify roots. +// +// This pass implements the code transformation described in this paper: +// "Accurate Garbage Collection in an Uncooperative Environment" +// Fergus Henderson, ISMM, 2002 +// +// In runtime/GC/SemiSpace.cpp is a prototype runtime which is compatible with +// ShadowStackGC. +// +// In order to support this particular transformation, all stack roots are +// coallocated in the stack. This allows a fully target-independent stack map +// while introducing only minor runtime overhead. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +#define DEBUG_TYPE "shadowstackgc" + +namespace { +class ShadowStackGC : public GCStrategy { +public: + ShadowStackGC(); +}; +} + +static GCRegistry::Add<ShadowStackGC> + X("shadow-stack", "Very portable GC for uncooperative code generators"); + +void llvm::linkShadowStackGC() {} + +ShadowStackGC::ShadowStackGC() { + InitRoots = true; + CustomRoots = true; +} diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp new file mode 100644 index 0000000..878eeee --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp @@ -0,0 +1,464 @@ +//===-- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the custom lowering code required by the shadow-stack GC +// strategy. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" + +using namespace llvm; + +#define DEBUG_TYPE "shadowstackgclowering" + +namespace { + +class ShadowStackGCLowering : public FunctionPass { + /// RootChain - This is the global linked-list that contains the chain of GC + /// roots. + GlobalVariable *Head; + + /// StackEntryTy - Abstract type of a link in the shadow stack. + /// + StructType *StackEntryTy; + StructType *FrameMapTy; + + /// Roots - GC roots in the current function. Each is a pair of the + /// intrinsic call and its corresponding alloca. + std::vector<std::pair<CallInst *, AllocaInst *>> Roots; + +public: + static char ID; + ShadowStackGCLowering(); + + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + +private: + bool IsNullValue(Value *V); + Constant *GetFrameMap(Function &F); + Type *GetConcreteStackEntryType(Function &F); + void CollectRoots(Function &F); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Type *Ty, Value *BasePtr, int Idx1, + const char *Name); + static GetElementPtrInst *CreateGEP(LLVMContext &Context, IRBuilder<> &B, + Type *Ty, Value *BasePtr, int Idx1, int Idx2, + const char *Name); +}; +} + +INITIALIZE_PASS_BEGIN(ShadowStackGCLowering, "shadow-stack-gc-lowering", + "Shadow Stack GC Lowering", false, false) +INITIALIZE_PASS_DEPENDENCY(GCModuleInfo) +INITIALIZE_PASS_END(ShadowStackGCLowering, "shadow-stack-gc-lowering", + "Shadow Stack GC Lowering", false, false) + +FunctionPass *llvm::createShadowStackGCLoweringPass() { return new ShadowStackGCLowering(); } + +char ShadowStackGCLowering::ID = 0; + +ShadowStackGCLowering::ShadowStackGCLowering() + : FunctionPass(ID), Head(nullptr), StackEntryTy(nullptr), + FrameMapTy(nullptr) { + initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry()); +} + +namespace { +/// EscapeEnumerator - This is a little algorithm to find all escape points +/// from a function so that "finally"-style code can be inserted. In addition +/// to finding the existing return and unwind instructions, it also (if +/// necessary) transforms any call instructions into invokes and sends them to +/// a landing pad. +/// +/// It's wrapped up in a state machine using the same transform C# uses for +/// 'yield return' enumerators, This transform allows it to be non-allocating. +class EscapeEnumerator { + Function &F; + const char *CleanupBBName; + + // State. + int State; + Function::iterator StateBB, StateE; + IRBuilder<> Builder; + +public: + EscapeEnumerator(Function &F, const char *N = "cleanup") + : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {} + + IRBuilder<> *Next() { + switch (State) { + default: + return nullptr; + + case 0: + StateBB = F.begin(); + StateE = F.end(); + State = 1; + + case 1: + // Find all 'return', 'resume', and 'unwind' instructions. + while (StateBB != StateE) { + BasicBlock *CurBB = &*StateBB++; + + // Branches and invokes do not escape, only unwind, resume, and return + // do. + TerminatorInst *TI = CurBB->getTerminator(); + if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI)) + continue; + + Builder.SetInsertPoint(TI); + return &Builder; + } + + State = 2; + + // Find all 'call' instructions. + SmallVector<Instruction *, 16> Calls; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE; + ++II) + if (CallInst *CI = dyn_cast<CallInst>(II)) + if (!CI->getCalledFunction() || + !CI->getCalledFunction()->getIntrinsicID()) + Calls.push_back(CI); + + if (Calls.empty()) + return nullptr; + + // Create a cleanup block. + LLVMContext &C = F.getContext(); + BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F); + Type *ExnTy = + StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr); + if (!F.hasPersonalityFn()) { + Constant *PersFn = F.getParent()->getOrInsertFunction( + "__gcc_personality_v0", + FunctionType::get(Type::getInt32Ty(C), true)); + F.setPersonalityFn(PersFn); + } + LandingPadInst *LPad = + LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB); + LPad->setCleanup(true); + ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB); + + // Transform the 'call' instructions into 'invoke's branching to the + // cleanup block. Go in reverse order to make prettier BB names. + SmallVector<Value *, 16> Args; + for (unsigned I = Calls.size(); I != 0;) { + CallInst *CI = cast<CallInst>(Calls[--I]); + + // Split the basic block containing the function call. + BasicBlock *CallBB = CI->getParent(); + BasicBlock *NewBB = CallBB->splitBasicBlock( + CI->getIterator(), CallBB->getName() + ".cont"); + + // Remove the unconditional branch inserted at the end of CallBB. + CallBB->getInstList().pop_back(); + NewBB->getInstList().remove(CI); + + // Create a new invoke instruction. + Args.clear(); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); + + InvokeInst *II = + InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args, + CI->getName(), CallBB); + II->setCallingConv(CI->getCallingConv()); + II->setAttributes(CI->getAttributes()); + CI->replaceAllUsesWith(II); + delete CI; + } + + Builder.SetInsertPoint(RI); + return &Builder; + } + } +}; +} + + +Constant *ShadowStackGCLowering::GetFrameMap(Function &F) { + // doInitialization creates the abstract type of this value. + Type *VoidPtr = Type::getInt8PtrTy(F.getContext()); + + // Truncate the ShadowStackDescriptor if some metadata is null. + unsigned NumMeta = 0; + SmallVector<Constant *, 16> Metadata; + for (unsigned I = 0; I != Roots.size(); ++I) { + Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1)); + if (!C->isNullValue()) + NumMeta = I + 1; + Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); + } + Metadata.resize(NumMeta); + + Type *Int32Ty = Type::getInt32Ty(F.getContext()); + + Constant *BaseElts[] = { + ConstantInt::get(Int32Ty, Roots.size(), false), + ConstantInt::get(Int32Ty, NumMeta, false), + }; + + Constant *DescriptorElts[] = { + ConstantStruct::get(FrameMapTy, BaseElts), + ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)}; + + Type *EltTys[] = {DescriptorElts[0]->getType(), DescriptorElts[1]->getType()}; + StructType *STy = StructType::create(EltTys, "gc_map." + utostr(NumMeta)); + + Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts); + + // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems + // that, short of multithreaded LLVM, it should be safe; all that is + // necessary is that a simple Module::iterator loop not be invalidated. + // Appending to the GlobalVariable list is safe in that sense. + // + // All of the output passes emit globals last. The ExecutionEngine + // explicitly supports adding globals to the module after + // initialization. + // + // Still, if it isn't deemed acceptable, then this transformation needs + // to be a ModulePass (which means it cannot be in the 'llc' pipeline + // (which uses a FunctionPassManager (which segfaults (not asserts) if + // provided a ModulePass))). + Constant *GV = new GlobalVariable(*F.getParent(), FrameMap->getType(), true, + GlobalVariable::InternalLinkage, FrameMap, + "__gc_" + F.getName()); + + Constant *GEPIndices[2] = { + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0), + ConstantInt::get(Type::getInt32Ty(F.getContext()), 0)}; + return ConstantExpr::getGetElementPtr(FrameMap->getType(), GV, GEPIndices); +} + +Type *ShadowStackGCLowering::GetConcreteStackEntryType(Function &F) { + // doInitialization creates the generic version of this type. + std::vector<Type *> EltTys; + EltTys.push_back(StackEntryTy); + for (size_t I = 0; I != Roots.size(); I++) + EltTys.push_back(Roots[I].second->getAllocatedType()); + + return StructType::create(EltTys, ("gc_stackentry." + F.getName()).str()); +} + +/// doInitialization - If this module uses the GC intrinsics, find them now. If +/// not, exit fast. +bool ShadowStackGCLowering::doInitialization(Module &M) { + bool Active = false; + for (Function &F : M) { + if (F.hasGC() && F.getGC() == std::string("shadow-stack")) { + Active = true; + break; + } + } + if (!Active) + return false; + + // struct FrameMap { + // int32_t NumRoots; // Number of roots in stack frame. + // int32_t NumMeta; // Number of metadata descriptors. May be < NumRoots. + // void *Meta[]; // May be absent for roots without metadata. + // }; + std::vector<Type *> EltTys; + // 32 bits is ok up to a 32GB stack frame. :) + EltTys.push_back(Type::getInt32Ty(M.getContext())); + // Specifies length of variable length array. + EltTys.push_back(Type::getInt32Ty(M.getContext())); + FrameMapTy = StructType::create(EltTys, "gc_map"); + PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy); + + // struct StackEntry { + // ShadowStackEntry *Next; // Caller's stack entry. + // FrameMap *Map; // Pointer to constant FrameMap. + // void *Roots[]; // Stack roots (in-place array, so we pretend). + // }; + + StackEntryTy = StructType::create(M.getContext(), "gc_stackentry"); + + EltTys.clear(); + EltTys.push_back(PointerType::getUnqual(StackEntryTy)); + EltTys.push_back(FrameMapPtrTy); + StackEntryTy->setBody(EltTys); + PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy); + + // Get the root chain if it already exists. + Head = M.getGlobalVariable("llvm_gc_root_chain"); + if (!Head) { + // If the root chain does not exist, insert a new one with linkonce + // linkage! + Head = new GlobalVariable( + M, StackEntryPtrTy, false, GlobalValue::LinkOnceAnyLinkage, + Constant::getNullValue(StackEntryPtrTy), "llvm_gc_root_chain"); + } else if (Head->hasExternalLinkage() && Head->isDeclaration()) { + Head->setInitializer(Constant::getNullValue(StackEntryPtrTy)); + Head->setLinkage(GlobalValue::LinkOnceAnyLinkage); + } + + return true; +} + +bool ShadowStackGCLowering::IsNullValue(Value *V) { + if (Constant *C = dyn_cast<Constant>(V)) + return C->isNullValue(); + return false; +} + +void ShadowStackGCLowering::CollectRoots(Function &F) { + // FIXME: Account for original alignment. Could fragment the root array. + // Approach 1: Null initialize empty slots at runtime. Yuck. + // Approach 2: Emit a map of the array instead of just a count. + + assert(Roots.empty() && "Not cleaned up?"); + + SmallVector<std::pair<CallInst *, AllocaInst *>, 16> MetaRoots; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) + if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(II++)) + if (Function *F = CI->getCalledFunction()) + if (F->getIntrinsicID() == Intrinsic::gcroot) { + std::pair<CallInst *, AllocaInst *> Pair = std::make_pair( + CI, + cast<AllocaInst>(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) + Roots.push_back(Pair); + else + MetaRoots.push_back(Pair); + } + + // Number roots with metadata (usually empty) at the beginning, so that the + // FrameMap::Meta array can be elided. + Roots.insert(Roots.begin(), MetaRoots.begin(), MetaRoots.end()); +} + +GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Type *Ty, + Value *BasePtr, int Idx, + int Idx2, + const char *Name) { + Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx), + ConstantInt::get(Type::getInt32Ty(Context), Idx2)}; + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +GetElementPtrInst *ShadowStackGCLowering::CreateGEP(LLVMContext &Context, + IRBuilder<> &B, Type *Ty, Value *BasePtr, + int Idx, const char *Name) { + Value *Indices[] = {ConstantInt::get(Type::getInt32Ty(Context), 0), + ConstantInt::get(Type::getInt32Ty(Context), Idx)}; + Value *Val = B.CreateGEP(Ty, BasePtr, Indices, Name); + + assert(isa<GetElementPtrInst>(Val) && "Unexpected folded constant"); + + return dyn_cast<GetElementPtrInst>(Val); +} + +/// runOnFunction - Insert code to maintain the shadow stack. +bool ShadowStackGCLowering::runOnFunction(Function &F) { + // Quick exit for functions that do not use the shadow stack GC. + if (!F.hasGC() || + F.getGC() != std::string("shadow-stack")) + return false; + + LLVMContext &Context = F.getContext(); + + // Find calls to llvm.gcroot. + CollectRoots(F); + + // If there are no roots in this function, then there is no need to add a + // stack map entry for it. + if (Roots.empty()) + return false; + + // Build the constant map and figure the type of the shadow stack entry. + Value *FrameMap = GetFrameMap(F); + Type *ConcreteStackEntryTy = GetConcreteStackEntryType(F); + + // Build the shadow stack entry at the very start of the function. + BasicBlock::iterator IP = F.getEntryBlock().begin(); + IRBuilder<> AtEntry(IP->getParent(), IP); + + Instruction *StackEntry = + AtEntry.CreateAlloca(ConcreteStackEntryTy, nullptr, "gc_frame"); + + while (isa<AllocaInst>(IP)) + ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Initialize the map pointer and load the current head of the shadow stack. + Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead"); + Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 1, "gc_frame.map"); + AtEntry.CreateStore(FrameMap, EntryMapPtr); + + // After all the allocas... + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + // For each root, find the corresponding slot in the aggregate... + Value *SlotPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 1 + I, "gc_root"); + + // And use it in lieu of the alloca. + AllocaInst *OriginalAlloca = Roots[I].second; + SlotPtr->takeName(OriginalAlloca); + OriginalAlloca->replaceAllUsesWith(SlotPtr); + } + + // Move past the original stores inserted by GCStrategy::InitRoots. This isn't + // really necessary (the collector would never see the intermediate state at + // runtime), but it's nicer not to push the half-initialized entry onto the + // shadow stack. + while (isa<StoreInst>(IP)) + ++IP; + AtEntry.SetInsertPoint(IP->getParent(), IP); + + // Push the entry onto the shadow stack. + Instruction *EntryNextPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, 0, "gc_frame.next"); + Instruction *NewHeadVal = CreateGEP(Context, AtEntry, ConcreteStackEntryTy, + StackEntry, 0, "gc_newhead"); + AtEntry.CreateStore(CurrentHead, EntryNextPtr); + AtEntry.CreateStore(NewHeadVal, Head); + + // For each instruction that escapes... + EscapeEnumerator EE(F, "gc_cleanup"); + while (IRBuilder<> *AtExit = EE.Next()) { + // Pop the entry from the shadow stack. Don't reuse CurrentHead from + // AtEntry, since that would make the value live for the entire function. + Instruction *EntryNextPtr2 = + CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0, + "gc_frame.next"); + Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead"); + AtExit->CreateStore(SavedHead, Head); + } + + // Delete the original allocas (which are no longer used) and the intrinsic + // calls (which are no longer valid). Doing this last avoids invalidating + // iterators. + for (unsigned I = 0, E = Roots.size(); I != E; ++I) { + Roots[I].first->eraseFromParent(); + Roots[I].second->eraseFromParent(); + } + + Roots.clear(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp new file mode 100644 index 0000000..f8aa1e2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -0,0 +1,511 @@ +//===-- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass looks for safe point where the prologue and epilogue can be +// inserted. +// The safe point for the prologue (resp. epilogue) is called Save +// (resp. Restore). +// A point is safe for prologue (resp. epilogue) if and only if +// it 1) dominates (resp. post-dominates) all the frame related operations and +// between 2) two executions of the Save (resp. Restore) point there is an +// execution of the Restore (resp. Save) point. +// +// For instance, the following points are safe: +// for (int i = 0; i < 10; ++i) { +// Save +// ... +// Restore +// } +// Indeed, the execution looks like Save -> Restore -> Save -> Restore ... +// And the following points are not: +// for (int i = 0; i < 10; ++i) { +// Save +// ... +// } +// for (int i = 0; i < 10; ++i) { +// ... +// Restore +// } +// Indeed, the execution looks like Save -> Save -> ... -> Restore -> Restore. +// +// This pass also ensures that the safe points are 3) cheaper than the regular +// entry and exits blocks. +// +// Property #1 is ensured via the use of MachineDominatorTree and +// MachinePostDominatorTree. +// Property #2 is ensured via property #1 and MachineLoopInfo, i.e., both +// points must be in the same loop. +// Property #3 is ensured via the MachineBlockFrequencyInfo. +// +// If this pass found points matching all these properties, then +// MachineFrameInfo is updated with this information. +//===----------------------------------------------------------------------===// +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +// To check for profitability. +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +// For property #1 for Save. +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +// To record the result of the analysis. +#include "llvm/CodeGen/MachineFrameInfo.h" +// For property #2. +#include "llvm/CodeGen/MachineLoopInfo.h" +// For property #1 for Restore. +#include "llvm/CodeGen/MachinePostDominators.h" +#include "llvm/CodeGen/Passes.h" +// To know about callee-saved. +#include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/Debug.h" +// To query the target about frame lowering. +#include "llvm/Target/TargetFrameLowering.h" +// To know about frame setup operation. +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +// To access TargetInstrInfo. +#include "llvm/Target/TargetSubtargetInfo.h" + +#define DEBUG_TYPE "shrink-wrap" + +using namespace llvm; + +STATISTIC(NumFunc, "Number of functions"); +STATISTIC(NumCandidates, "Number of shrink-wrapping candidates"); +STATISTIC(NumCandidatesDropped, + "Number of shrink-wrapping candidates dropped because of frequency"); + +static cl::opt<cl::boolOrDefault> + EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, + cl::desc("enable the shrink-wrapping pass")); + +namespace { +/// \brief Class to determine where the safe point to insert the +/// prologue and epilogue are. +/// Unlike the paper from Fred C. Chow, PLDI'88, that introduces the +/// shrink-wrapping term for prologue/epilogue placement, this pass +/// does not rely on expensive data-flow analysis. Instead we use the +/// dominance properties and loop information to decide which point +/// are safe for such insertion. +class ShrinkWrap : public MachineFunctionPass { + /// Hold callee-saved information. + RegisterClassInfo RCI; + MachineDominatorTree *MDT; + MachinePostDominatorTree *MPDT; + /// Current safe point found for the prologue. + /// The prologue will be inserted before the first instruction + /// in this basic block. + MachineBasicBlock *Save; + /// Current safe point found for the epilogue. + /// The epilogue will be inserted before the first terminator instruction + /// in this basic block. + MachineBasicBlock *Restore; + /// Hold the information of the basic block frequency. + /// Use to check the profitability of the new points. + MachineBlockFrequencyInfo *MBFI; + /// Hold the loop information. Used to determine if Save and Restore + /// are in the same loop. + MachineLoopInfo *MLI; + /// Frequency of the Entry block. + uint64_t EntryFreq; + /// Current opcode for frame setup. + unsigned FrameSetupOpcode; + /// Current opcode for frame destroy. + unsigned FrameDestroyOpcode; + /// Entry block. + const MachineBasicBlock *Entry; + typedef SmallSetVector<unsigned, 16> SetOfRegs; + /// Registers that need to be saved for the current function. + mutable SetOfRegs CurrentCSRs; + /// Current MachineFunction. + MachineFunction *MachineFunc; + + /// \brief Check if \p MI uses or defines a callee-saved register or + /// a frame index. If this is the case, this means \p MI must happen + /// after Save and before Restore. + bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS) const; + + const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const { + if (CurrentCSRs.empty()) { + BitVector SavedRegs; + const TargetFrameLowering *TFI = + MachineFunc->getSubtarget().getFrameLowering(); + + TFI->determineCalleeSaves(*MachineFunc, SavedRegs, RS); + + for (int Reg = SavedRegs.find_first(); Reg != -1; + Reg = SavedRegs.find_next(Reg)) + CurrentCSRs.insert((unsigned)Reg); + } + return CurrentCSRs; + } + + /// \brief Update the Save and Restore points such that \p MBB is in + /// the region that is dominated by Save and post-dominated by Restore + /// and Save and Restore still match the safe point definition. + /// Such point may not exist and Save and/or Restore may be null after + /// this call. + void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); + + /// \brief Initialize the pass for \p MF. + void init(MachineFunction &MF) { + RCI.runOnMachineFunction(MF); + MDT = &getAnalysis<MachineDominatorTree>(); + MPDT = &getAnalysis<MachinePostDominatorTree>(); + Save = nullptr; + Restore = nullptr; + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + MLI = &getAnalysis<MachineLoopInfo>(); + EntryFreq = MBFI->getEntryFreq(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + FrameSetupOpcode = TII.getCallFrameSetupOpcode(); + FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); + Entry = &MF.front(); + CurrentCSRs.clear(); + MachineFunc = &MF; + + ++NumFunc; + } + + /// Check whether or not Save and Restore points are still interesting for + /// shrink-wrapping. + bool ArePointsInteresting() const { return Save != Entry && Save && Restore; } + + /// \brief Check if shrink wrapping is enabled for this target and function. + static bool isShrinkWrapEnabled(const MachineFunction &MF); + +public: + static char ID; + + ShrinkWrap() : MachineFunctionPass(ID) { + initializeShrinkWrapPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequired<MachineDominatorTree>(); + AU.addRequired<MachinePostDominatorTree>(); + AU.addRequired<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const override { + return "Shrink Wrapping analysis"; + } + + /// \brief Perform the shrink-wrapping analysis and update + /// the MachineFrameInfo attached to \p MF with the results. + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // End anonymous namespace. + +char ShrinkWrap::ID = 0; +char &llvm::ShrinkWrapID = ShrinkWrap::ID; + +INITIALIZE_PASS_BEGIN(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, + false) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(ShrinkWrap, "shrink-wrap", "Shrink Wrap Pass", false, false) + +bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, + RegScavenger *RS) const { + if (MI.getOpcode() == FrameSetupOpcode || + MI.getOpcode() == FrameDestroyOpcode) { + DEBUG(dbgs() << "Frame instruction: " << MI << '\n'); + return true; + } + for (const MachineOperand &MO : MI.operands()) { + bool UseOrDefCSR = false; + if (MO.isReg()) { + unsigned PhysReg = MO.getReg(); + if (!PhysReg) + continue; + assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && + "Unallocated register?!"); + UseOrDefCSR = RCI.getLastCalleeSavedAlias(PhysReg); + } else if (MO.isRegMask()) { + // Check if this regmask clobbers any of the CSRs. + for (unsigned Reg : getCurrentCSRs(RS)) { + if (MO.clobbersPhysReg(Reg)) { + UseOrDefCSR = true; + break; + } + } + } + if (UseOrDefCSR || MO.isFI()) { + DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI(" + << MO.isFI() << "): " << MI << '\n'); + return true; + } + } + return false; +} + +/// \brief Helper function to find the immediate (post) dominator. +template <typename ListOfBBs, typename DominanceAnalysis> +MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, + DominanceAnalysis &Dom) { + MachineBasicBlock *IDom = &Block; + for (MachineBasicBlock *BB : BBs) { + IDom = Dom.findNearestCommonDominator(IDom, BB); + if (!IDom) + break; + } + return IDom; +} + +void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, + RegScavenger *RS) { + // Get rid of the easy cases first. + if (!Save) + Save = &MBB; + else + Save = MDT->findNearestCommonDominator(Save, &MBB); + + if (!Save) { + DEBUG(dbgs() << "Found a block that is not reachable from Entry\n"); + return; + } + + if (!Restore) + Restore = &MBB; + else + Restore = MPDT->findNearestCommonDominator(Restore, &MBB); + + // Make sure we would be able to insert the restore code before the + // terminator. + if (Restore == &MBB) { + for (const MachineInstr &Terminator : MBB.terminators()) { + if (!useOrDefCSROrFI(Terminator, RS)) + continue; + // One of the terminator needs to happen before the restore point. + if (MBB.succ_empty()) { + Restore = nullptr; + break; + } + // Look for a restore point that post-dominates all the successors. + // The immediate post-dominator is what we are looking for. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + break; + } + } + + if (!Restore) { + DEBUG(dbgs() << "Restore point needs to be spanned on several blocks\n"); + return; + } + + // Make sure Save and Restore are suitable for shrink-wrapping: + // 1. all path from Save needs to lead to Restore before exiting. + // 2. all path to Restore needs to go through Save from Entry. + // We achieve that by making sure that: + // A. Save dominates Restore. + // B. Restore post-dominates Save. + // C. Save and Restore are in the same loop. + bool SaveDominatesRestore = false; + bool RestorePostDominatesSave = false; + while (Save && Restore && + (!(SaveDominatesRestore = MDT->dominates(Save, Restore)) || + !(RestorePostDominatesSave = MPDT->dominates(Restore, Save)) || + // Post-dominance is not enough in loops to ensure that all uses/defs + // are after the prologue and before the epilogue at runtime. + // E.g., + // while(1) { + // Save + // Restore + // if (...) + // break; + // use/def CSRs + // } + // All the uses/defs of CSRs are dominated by Save and post-dominated + // by Restore. However, the CSRs uses are still reachable after + // Restore and before Save are executed. + // + // For now, just push the restore/save points outside of loops. + // FIXME: Refine the criteria to still find interesting cases + // for loops. + MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { + // Fix (A). + if (!SaveDominatesRestore) { + Save = MDT->findNearestCommonDominator(Save, Restore); + continue; + } + // Fix (B). + if (!RestorePostDominatesSave) + Restore = MPDT->findNearestCommonDominator(Restore, Save); + + // Fix (C). + if (Save && Restore && + (MLI->getLoopFor(Save) || MLI->getLoopFor(Restore))) { + if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) { + // Push Save outside of this loop if immediate dominator is different + // from save block. If immediate dominator is not different, bail out. + MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (IDom != Save) + Save = IDom; + else { + Save = nullptr; + break; + } + } else { + // If the loop does not exit, there is no point in looking + // for a post-dominator outside the loop. + SmallVector<MachineBasicBlock*, 4> ExitBlocks; + MLI->getLoopFor(Restore)->getExitingBlocks(ExitBlocks); + // Push Restore outside of this loop. + // Look for the immediate post-dominator of the loop exits. + MachineBasicBlock *IPdom = Restore; + for (MachineBasicBlock *LoopExitBB: ExitBlocks) { + IPdom = FindIDom<>(*IPdom, LoopExitBB->successors(), *MPDT); + if (!IPdom) + break; + } + // If the immediate post-dominator is not in a less nested loop, + // then we are stuck in a program with an infinite loop. + // In that case, we will not find a safe point, hence, bail out. + if (IPdom && MLI->getLoopDepth(IPdom) < MLI->getLoopDepth(Restore)) + Restore = IPdom; + else { + Restore = nullptr; + break; + } + } + } + } +} + +bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { + if (MF.empty() || !isShrinkWrapEnabled(MF)) + return false; + + DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + init(MF); + + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::unique_ptr<RegScavenger> RS( + TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + + for (MachineBasicBlock &MBB : MF) { + DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() + << '\n'); + + if (MBB.isEHFuncletEntry()) { + DEBUG(dbgs() << "EH Funclets are not supported yet.\n"); + return false; + } + + for (const MachineInstr &MI : MBB) { + if (!useOrDefCSROrFI(MI, RS.get())) + continue; + // Save (resp. restore) point must dominate (resp. post dominate) + // MI. Look for the proper basic block for those. + updateSaveRestorePoints(MBB, RS.get()); + // If we are at a point where we cannot improve the placement of + // save/restore instructions, just give up. + if (!ArePointsInteresting()) { + DEBUG(dbgs() << "No Shrink wrap candidate found\n"); + return false; + } + // No need to look for other instructions, this basic block + // will already be part of the handled region. + break; + } + } + if (!ArePointsInteresting()) { + // If the points are not interesting at this point, then they must be null + // because it means we did not encounter any frame/CSR related code. + // Otherwise, we would have returned from the previous loop. + assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!"); + DEBUG(dbgs() << "Nothing to shrink-wrap\n"); + return false; + } + + DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: " << EntryFreq + << '\n'); + + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + do { + DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: " + << Save->getNumber() << ' ' << Save->getName() << ' ' + << MBFI->getBlockFreq(Save).getFrequency() << "\nRestore: " + << Restore->getNumber() << ' ' << Restore->getName() << ' ' + << MBFI->getBlockFreq(Restore).getFrequency() << '\n'); + + bool IsSaveCheap, TargetCanUseSaveAsPrologue = false; + if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save).getFrequency()) && + EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && + ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) && + TFI->canUseAsEpilogue(*Restore))) + break; + DEBUG(dbgs() << "New points are too expensive or invalid for the target\n"); + MachineBasicBlock *NewBB; + if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) { + Save = FindIDom<>(*Save, Save->predecessors(), *MDT); + if (!Save) + break; + NewBB = Save; + } else { + // Restore is expensive. + Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT); + if (!Restore) + break; + NewBB = Restore; + } + updateSaveRestorePoints(*NewBB, RS.get()); + } while (Save && Restore); + + if (!ArePointsInteresting()) { + ++NumCandidatesDropped; + return false; + } + + DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() + << ' ' << Save->getName() << "\nRestore: " + << Restore->getNumber() << ' ' << Restore->getName() << '\n'); + + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setSavePoint(Save); + MFI->setRestorePoint(Restore); + ++NumCandidates; + return false; +} + +bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + + switch (EnableShrinkWrapOpt) { + case cl::BOU_UNSET: + return TFI->enableShrinkWrapping(MF) && + // Windows with CFI has some limitations that make it impossible + // to use shrink-wrapping. + !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() && + // Sanitizers look at the value of the stack at the location + // of the crash. Since a crash can happen anywhere, the + // frame must be lowered before anything else happen for the + // sanitizers to be able to get a correct stack frame. + !(MF.getFunction()->hasFnAttribute(Attribute::SanitizeAddress) || + MF.getFunction()->hasFnAttribute(Attribute::SanitizeThread) || + MF.getFunction()->hasFnAttribute(Attribute::SanitizeMemory)); + // If EnableShrinkWrap is set, it takes precedence on whatever the + // target sets. The rational is that we assume we want to test + // something related to shrink-wrapping. + case cl::BOU_TRUE: + return true; + case cl::BOU_FALSE: + return false; + } + llvm_unreachable("Invalid shrink-wrapping state"); +} diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp new file mode 100644 index 0000000..e1f242a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -0,0 +1,495 @@ +//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This transformation is designed for use by code generators which use SjLj +// based exception handling. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include <set> +using namespace llvm; + +#define DEBUG_TYPE "sjljehprepare" + +STATISTIC(NumInvokes, "Number of invokes replaced"); +STATISTIC(NumSpilled, "Number of registers live across unwind edges"); + +namespace { +class SjLjEHPrepare : public FunctionPass { + Type *doubleUnderDataTy; + Type *doubleUnderJBufTy; + Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetupDispatchFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *CallSiteFn; + Constant *FuncCtxFn; + AllocaInst *FuncCtx; + +public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPrepare() : FunctionPass(ID) {} + bool doInitialization(Module &M) override; + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override {} + const char *getPassName() const override { + return "SJLJ Exception Handling preparation"; + } + +private: + bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal); + Value *setupFunctionContext(Function &F, ArrayRef<LandingPadInst *> LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef<InvokeInst *> Invokes); + void insertCallSiteStore(Instruction *I, int Number); +}; +} // end anonymous namespace + +char SjLjEHPrepare::ID = 0; +INITIALIZE_PASS(SjLjEHPrepare, "sjljehprepare", "Prepare SjLj exceptions", + false, false) + +// Public Interface To the SjLjEHPrepare pass. +FunctionPass *llvm::createSjLjEHPreparePass() { return new SjLjEHPrepare(); } +// doInitialization - Set up decalarations and types needed to process +// exceptions. +bool SjLjEHPrepare::doInitialization(Module &M) { + // Build the function context structure. + // builtin_setjmp uses a five word jbuf + Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); + Type *Int32Ty = Type::getInt32Ty(M.getContext()); + doubleUnderDataTy = ArrayType::get(Int32Ty, 4); + doubleUnderJBufTy = ArrayType::get(VoidPtrTy, 5); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + doubleUnderDataTy, // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + doubleUnderJBufTy, // __jbuf + nullptr); + RegisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); + UnregisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)nullptr); + FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); + BuiltinSetupDispatchFn = + Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setup_dispatch); + LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); + CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); + FuncCtxFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_functioncontext); + PersonalityFn = nullptr; + + return true; +} + +/// insertCallSiteStore - Insert a store of the call-site value to the +/// function context +void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { + IRBuilder<> Builder(I); + + // Get a reference to the call_site field. + Type *Int32Ty = Type::getInt32Ty(I->getContext()); + Value *Zero = ConstantInt::get(Int32Ty, 0); + Value *One = ConstantInt::get(Int32Ty, 1); + Value *Idxs[2] = { Zero, One }; + Value *CallSite = + Builder.CreateGEP(FunctionContextTy, FuncCtx, Idxs, "call_site"); + + // Insert a store of the call-site number + ConstantInt *CallSiteNoC = + ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); + Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); +} + +/// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until +/// we reach blocks we've already seen. +static void MarkBlocksLiveIn(BasicBlock *BB, + SmallPtrSetImpl<BasicBlock *> &LiveBBs) { + if (!LiveBBs.insert(BB).second) + return; // already been here. + + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) + MarkBlocksLiveIn(*PI, LiveBBs); +} + +/// substituteLPadValues - Substitute the values returned by the landingpad +/// instruction with those returned by the personality function. +void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, + Value *SelVal) { + SmallVector<Value *, 8> UseWorkList(LPI->user_begin(), LPI->user_end()); + while (!UseWorkList.empty()) { + Value *Val = UseWorkList.pop_back_val(); + ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Val); + if (!EVI) + continue; + if (EVI->getNumIndices() != 1) + continue; + if (*EVI->idx_begin() == 0) + EVI->replaceAllUsesWith(ExnVal); + else if (*EVI->idx_begin() == 1) + EVI->replaceAllUsesWith(SelVal); + if (EVI->getNumUses() == 0) + EVI->eraseFromParent(); + } + + if (LPI->getNumUses() == 0) + return; + + // There are still some uses of LPI. Construct an aggregate with the exception + // values and replace the LPI with that aggregate. + Type *LPadType = LPI->getType(); + Value *LPadVal = UndefValue::get(LPadType); + auto *SelI = cast<Instruction>(SelVal); + IRBuilder<> Builder(SelI->getParent(), std::next(SelI->getIterator())); + LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); + LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); + + LPI->replaceAllUsesWith(LPadVal); +} + +/// setupFunctionContext - Allocate the function context on the stack and fill +/// it with all of the data that we know at this point. +Value *SjLjEHPrepare::setupFunctionContext(Function &F, + ArrayRef<LandingPadInst *> LPads) { + BasicBlock *EntryBB = &F.front(); + + // Create an alloca for the incoming jump buffer ptr and the new jump buffer + // that needs to be restored on all exits from the function. This is an alloca + // because the value needs to be added to the global context list. + auto &DL = F.getParent()->getDataLayout(); + unsigned Align = DL.getPrefTypeAlignment(FunctionContextTy); + FuncCtx = new AllocaInst(FunctionContextTy, nullptr, Align, "fn_context", + &EntryBB->front()); + + // Fill in the function context structure. + for (unsigned I = 0, E = LPads.size(); I != E; ++I) { + LandingPadInst *LPI = LPads[I]; + IRBuilder<> Builder(LPI->getParent(), + LPI->getParent()->getFirstInsertionPt()); + + // Reference the __data field. + Value *FCData = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data"); + + // The exception values come back in context->__data[0]. + Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, + 0, 0, "exception_gep"); + Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); + ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); + + Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData, + 0, 1, "exn_selector_gep"); + Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); + + substituteLPadValues(LPI, ExnVal, SelVal); + } + + // Personality function + IRBuilder<> Builder(EntryBB->getTerminator()); + if (!PersonalityFn) + PersonalityFn = F.getPersonalityFn(); + Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32( + FunctionContextTy, FuncCtx, 0, 3, "pers_fn_gep"); + Builder.CreateStore( + Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); + + // LSDA address + Value *LSDA = Builder.CreateCall(LSDAAddrFn, {}, "lsda_addr"); + Value *LSDAFieldPtr = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 4, "lsda_gep"); + Builder.CreateStore(LSDA, LSDAFieldPtr, /*isVolatile=*/true); + + return FuncCtx; +} + +/// lowerIncomingArguments - To avoid having to handle incoming arguments +/// specially, we lower each arg to a copy instruction in the entry block. This +/// ensures that the argument value itself cannot be live out of the entry +/// block. +void SjLjEHPrepare::lowerIncomingArguments(Function &F) { + BasicBlock::iterator AfterAllocaInsPt = F.begin()->begin(); + while (isa<AllocaInst>(AfterAllocaInsPt) && + isa<ConstantInt>(cast<AllocaInst>(AfterAllocaInsPt)->getArraySize())) + ++AfterAllocaInsPt; + assert(AfterAllocaInsPt != F.front().end()); + + for (auto &AI : F.args()) { + Type *Ty = AI.getType(); + + // Use 'select i8 true, %arg, undef' to simulate a 'no-op' instruction. + Value *TrueValue = ConstantInt::getTrue(F.getContext()); + Value *UndefValue = UndefValue::get(Ty); + Instruction *SI = SelectInst::Create( + TrueValue, &AI, UndefValue, AI.getName() + ".tmp", &*AfterAllocaInsPt); + AI.replaceAllUsesWith(SI); + + // Reset the operand, because it was clobbered by the RAUW above. + SI->setOperand(1, &AI); + } +} + +/// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind +/// edge and spill them. +void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, + ArrayRef<InvokeInst *> Invokes) { + // Finally, scan the code looking for instructions with bad live ranges. + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; + ++II) { + // Ignore obvious cases we don't have to handle. In particular, most + // instructions either have no uses or only have a single use inside the + // current block. Ignore them quickly. + Instruction *Inst = &*II; + if (Inst->use_empty()) + continue; + if (Inst->hasOneUse() && + cast<Instruction>(Inst->user_back())->getParent() == BB && + !isa<PHINode>(Inst->user_back())) + continue; + + // If this is an alloca in the entry block, it's not a real register + // value. + if (AllocaInst *AI = dyn_cast<AllocaInst>(Inst)) + if (isa<ConstantInt>(AI->getArraySize()) && BB == F.begin()) + continue; + + // Avoid iterator invalidation by copying users to a temporary vector. + SmallVector<Instruction *, 16> Users; + for (User *U : Inst->users()) { + Instruction *UI = cast<Instruction>(U); + if (UI->getParent() != BB || isa<PHINode>(UI)) + Users.push_back(UI); + } + + // Find all of the blocks that this value is live in. + SmallPtrSet<BasicBlock *, 64> LiveBBs; + LiveBBs.insert(Inst->getParent()); + while (!Users.empty()) { + Instruction *U = Users.back(); + Users.pop_back(); + + if (!isa<PHINode>(U)) { + MarkBlocksLiveIn(U->getParent(), LiveBBs); + } else { + // Uses for a PHI node occur in their predecessor block. + PHINode *PN = cast<PHINode>(U); + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == Inst) + MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); + } + } + + // Now that we know all of the blocks that this thing is live in, see if + // it includes any of the unwind locations. + bool NeedsSpill = false; + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { + DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " + << UnwindBlock->getName() << "\n"); + NeedsSpill = true; + break; + } + } + + // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. + if (NeedsSpill) { + DemoteRegToStack(*Inst, true); + ++NumSpilled; + } + } + } + + // Go through the landing pads and remove any PHIs there. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); + + // Place PHIs into a set to avoid invalidating the iterator. + SmallPtrSet<PHINode *, 8> PHIsToDemote; + for (BasicBlock::iterator PN = UnwindBlock->begin(); isa<PHINode>(PN); ++PN) + PHIsToDemote.insert(cast<PHINode>(PN)); + if (PHIsToDemote.empty()) + continue; + + // Demote the PHIs to the stack. + for (PHINode *PN : PHIsToDemote) + DemotePHIToStack(PN); + + // Move the landingpad instruction back to the top of the landing pad block. + LPI->moveBefore(&UnwindBlock->front()); + } +} + +/// setupEntryBlockAndCallSites - Setup the entry block by creating and filling +/// the function context and marking the call sites with the appropriate +/// values. These values are used by the DWARF EH emitter. +bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { + SmallVector<ReturnInst *, 16> Returns; + SmallVector<InvokeInst *, 16> Invokes; + SmallSetVector<LandingPadInst *, 16> LPads; + + // Look through the terminators of the basic blocks to find invokes. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) { + if (Function *Callee = II->getCalledFunction()) + if (Callee->isIntrinsic() && + Callee->getIntrinsicID() == Intrinsic::donothing) { + // Remove the NOP invoke. + BranchInst::Create(II->getNormalDest(), II); + II->eraseFromParent(); + continue; + } + + Invokes.push_back(II); + LPads.insert(II->getUnwindDest()->getLandingPadInst()); + } else if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator())) { + Returns.push_back(RI); + } + + if (Invokes.empty()) + return false; + + NumInvokes += Invokes.size(); + + lowerIncomingArguments(F); + lowerAcrossUnwindEdges(F, Invokes); + + Value *FuncCtx = + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + BasicBlock *EntryBB = &F.front(); + IRBuilder<> Builder(EntryBB->getTerminator()); + + // Get a reference to the jump buffer. + Value *JBufPtr = + Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 5, "jbuf_gep"); + + // Save the frame pointer. + Value *FramePtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 0, + "jbuf_fp_gep"); + + Value *Val = Builder.CreateCall(FrameAddrFn, Builder.getInt32(0), "fp"); + Builder.CreateStore(Val, FramePtr, /*isVolatile=*/true); + + // Save the stack pointer. + Value *StackPtr = Builder.CreateConstGEP2_32(doubleUnderJBufTy, JBufPtr, 0, 2, + "jbuf_sp_gep"); + + Val = Builder.CreateCall(StackAddrFn, {}, "sp"); + Builder.CreateStore(Val, StackPtr, /*isVolatile=*/true); + + // Call the setup_dispatch instrinsic. It fills in the rest of the jmpbuf. + Builder.CreateCall(BuiltinSetupDispatchFn, {}); + + // Store a pointer to the function context so that the back-end will know + // where to look for it. + Value *FuncCtxArg = Builder.CreateBitCast(FuncCtx, Builder.getInt8PtrTy()); + Builder.CreateCall(FuncCtxFn, FuncCtxArg); + + // At this point, we are all set up, update the invoke instructions to mark + // their call_site values. + for (unsigned I = 0, E = Invokes.size(); I != E; ++I) { + insertCallSiteStore(Invokes[I], I + 1); + + ConstantInt *CallSiteNum = + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + + // Record the call site value for the back end so it stays associated with + // the invoke. + CallInst::Create(CallSiteFn, CallSiteNum, "", Invokes[I]); + } + + // Mark call instructions that aren't nounwind as no-action (call_site == + // -1). Skip the entry block, as prior to then, no function context has been + // created for this function and any unexpected exceptions thrown will go + // directly to the caller's context, which is what we want anyway, so no need + // to do anything here. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) + for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (!CI->doesNotThrow()) + insertCallSiteStore(CI, -1); + } else if (ResumeInst *RI = dyn_cast<ResumeInst>(I)) { + insertCallSiteStore(RI, -1); + } + + // Register the function context and make sure it's known to not throw + CallInst *Register = + CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); + Register->setDoesNotThrow(); + + // Following any allocas not in the entry block, update the saved SP in the + // jmpbuf to the new value. + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + if (BB == F.begin()) + continue; + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(I)) { + if (CI->getCalledFunction() != StackRestoreFn) + continue; + } else if (!isa<AllocaInst>(I)) { + continue; + } + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(&*I); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + } + + // Finally, for any returns from this function, if this function contains an + // invoke, add a call to unregister the function context. + for (unsigned I = 0, E = Returns.size(); I != E; ++I) + CallInst::Create(UnregisterFn, FuncCtx, "", Returns[I]); + + return true; +} + +bool SjLjEHPrepare::runOnFunction(Function &F) { + bool Res = setupEntryBlockAndCallSites(F); + return Res; +} diff --git a/contrib/llvm/lib/CodeGen/SlotIndexes.cpp b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp new file mode 100644 index 0000000..c9d23f6 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SlotIndexes.cpp @@ -0,0 +1,250 @@ +//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "slotindexes" + +char SlotIndexes::ID = 0; +INITIALIZE_PASS(SlotIndexes, "slotindexes", + "Slot index numbering", false, false) + +STATISTIC(NumLocalRenum, "Number of local renumberings"); +STATISTIC(NumGlobalRenum, "Number of global renumberings"); + +void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { + au.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(au); +} + +void SlotIndexes::releaseMemory() { + mi2iMap.clear(); + MBBRanges.clear(); + idx2MBBMap.clear(); + indexList.clear(); + ileAllocator.Reset(); +} + +bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { + + // Compute numbering as follows: + // Grab an iterator to the start of the index list. + // Iterate over all MBBs, and within each MBB all MIs, keeping the MI + // iterator in lock-step (though skipping it over indexes which have + // null pointers in the instruction field). + // At each iteration assert that the instruction pointed to in the index + // is the same one pointed to by the MI iterator. This + + // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should + // only need to be set up once after the first numbering is computed. + + mf = &fn; + + // Check that the list contains only the sentinal. + assert(indexList.empty() && "Index list non-empty at initial numbering?"); + assert(idx2MBBMap.empty() && + "Index -> MBB mapping non-empty at initial numbering?"); + assert(MBBRanges.empty() && + "MBB -> Index mapping non-empty at initial numbering?"); + assert(mi2iMap.empty() && + "MachineInstr -> Index mapping non-empty at initial numbering?"); + + unsigned index = 0; + MBBRanges.resize(mf->getNumBlockIDs()); + idx2MBBMap.reserve(mf->size()); + + indexList.push_back(createEntry(nullptr, index)); + + // Iterate over the function. + for (MachineFunction::iterator mbbItr = mf->begin(), mbbEnd = mf->end(); + mbbItr != mbbEnd; ++mbbItr) { + MachineBasicBlock *mbb = &*mbbItr; + + // Insert an index for the MBB start. + SlotIndex blockStartIndex(&indexList.back(), SlotIndex::Slot_Block); + + for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); + miItr != miEnd; ++miItr) { + MachineInstr *mi = miItr; + if (mi->isDebugValue()) + continue; + + // Insert a store index for the instr. + indexList.push_back(createEntry(mi, index += SlotIndex::InstrDist)); + + // Save this base index in the maps. + mi2iMap.insert(std::make_pair(mi, SlotIndex(&indexList.back(), + SlotIndex::Slot_Block))); + } + + // We insert one blank instructions between basic blocks. + indexList.push_back(createEntry(nullptr, index += SlotIndex::InstrDist)); + + MBBRanges[mbb->getNumber()].first = blockStartIndex; + MBBRanges[mbb->getNumber()].second = SlotIndex(&indexList.back(), + SlotIndex::Slot_Block); + idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); + } + + // Sort the Idx2MBBMap + std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); + + DEBUG(mf->print(dbgs(), this)); + + // And we're done! + return false; +} + +void SlotIndexes::renumberIndexes() { + // Renumber updates the index of every element of the index list. + DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); + ++NumGlobalRenum; + + unsigned index = 0; + + for (IndexList::iterator I = indexList.begin(), E = indexList.end(); + I != E; ++I) { + I->setIndex(index); + index += SlotIndex::InstrDist; + } +} + +// Renumber indexes locally after curItr was inserted, but failed to get a new +// index. +void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { + // Number indexes with half the default spacing so we can catch up quickly. + const unsigned Space = SlotIndex::InstrDist/2; + static_assert((Space & 3) == 0, "InstrDist must be a multiple of 2*NUM"); + + IndexList::iterator startItr = std::prev(curItr); + unsigned index = startItr->getIndex(); + do { + curItr->setIndex(index += Space); + ++curItr; + // If the next index is bigger, we have caught up. + } while (curItr != indexList.end() && curItr->getIndex() <= index); + + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << startItr->getIndex() << '-' + << index << " ***\n"); + ++NumLocalRenum; +} + +// Repair indexes after adding and removing instructions. +void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End) { + // FIXME: Is this really necessary? The only caller repairIntervalsForRange() + // does the same thing. + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !hasIndex(End)) + ++End; + + bool includeStart = (Begin == MBB->begin()); + SlotIndex startIdx; + if (includeStart) + startIdx = getMBBStartIdx(MBB); + else + startIdx = getInstructionIndex(Begin); + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB); + else + endIdx = getInstructionIndex(End); + + // FIXME: Conceptually, this code is implementing an iterator on MBB that + // optionally includes an additional position prior to MBB->begin(), indicated + // by the includeStart flag. This is done so that we can iterate MIs in a MBB + // in parallel with SlotIndexes, but there should be a better way to do this. + IndexList::iterator ListB = startIdx.listEntry()->getIterator(); + IndexList::iterator ListI = endIdx.listEntry()->getIterator(); + MachineBasicBlock::iterator MBBI = End; + bool pastStart = false; + while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { + assert(ListI->getIndex() >= startIdx.getIndex() && + (includeStart || !pastStart) && + "Decremented past the beginning of region to repair."); + + MachineInstr *SlotMI = ListI->getInstr(); + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : nullptr; + bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); + + if (SlotMI == MI && !MBBIAtBegin) { + --ListI; + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) { + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else { + --ListI; + if (SlotMI) + removeMachineInstrFromMaps(SlotMI); + } + } + + // In theory this could be combined with the previous loop, but it is tricky + // to update the IndexList while we are iterating it. + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end()) + insertMachineInstrInMaps(MI); + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SlotIndexes::dump() const { + for (IndexList::const_iterator itr = indexList.begin(); + itr != indexList.end(); ++itr) { + dbgs() << itr->getIndex() << " "; + + if (itr->getInstr()) { + dbgs() << *itr->getInstr(); + } else { + dbgs() << "\n"; + } + } + + for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) + dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + << MBBRanges[i].second << ")\n"; +} +#endif + +// Print a SlotIndex to a raw_ostream. +void SlotIndex::print(raw_ostream &os) const { + if (isValid()) + os << listEntry()->getIndex() << "Berd"[getSlot()]; + else + os << "invalid"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +// Dump a SlotIndex to stderr. +void SlotIndex::dump() const { + print(dbgs()); + dbgs() << "\n"; +} +#endif + diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp new file mode 100644 index 0000000..d30cfc2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -0,0 +1,390 @@ +//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the spill code placement analysis. +// +// Each edge bundle corresponds to a node in a Hopfield network. Constraints on +// basic blocks are weighted by the block frequency and added to become the node +// bias. +// +// Transparent basic blocks have the variable live through, but don't care if it +// is spilled or in a register. These blocks become connections in the Hopfield +// network, again weighted by block frequency. +// +// The Hopfield network minimizes (possibly locally) its energy function: +// +// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b ) +// +// The energy function represents the expected spill code execution frequency, +// or the cost of spilling. This is a Lyapunov function which never increases +// when a node is updated. It is guaranteed to converge to a local minimum. +// +//===----------------------------------------------------------------------===// + +#include "SpillPlacement.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/EdgeBundles.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +#define DEBUG_TYPE "spillplacement" + +char SpillPlacement::ID = 0; +INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement", + "Spill Code Placement Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(EdgeBundles) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement", + "Spill Code Placement Analysis", true, true) + +char &llvm::SpillPlacementID = SpillPlacement::ID; + +void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addRequiredTransitive<EdgeBundles>(); + AU.addRequiredTransitive<MachineLoopInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Node - Each edge bundle corresponds to a Hopfield node. +/// +/// The node contains precomputed frequency data that only depends on the CFG, +/// but Bias and Links are computed each time placeSpills is called. +/// +/// The node Value is positive when the variable should be in a register. The +/// value can change when linked nodes change, but convergence is very fast +/// because all weights are positive. +/// +struct SpillPlacement::Node { + /// BiasN - Sum of blocks that prefer a spill. + BlockFrequency BiasN; + /// BiasP - Sum of blocks that prefer a register. + BlockFrequency BiasP; + + /// Value - Output value of this node computed from the Bias and links. + /// This is always on of the values {-1, 0, 1}. A positive number means the + /// variable should go in a register through this bundle. + int Value; + + typedef SmallVector<std::pair<BlockFrequency, unsigned>, 4> LinkVector; + + /// Links - (Weight, BundleNo) for all transparent blocks connecting to other + /// bundles. The weights are all positive block frequencies. + LinkVector Links; + + /// SumLinkWeights - Cached sum of the weights of all links + ThresHold. + BlockFrequency SumLinkWeights; + + /// preferReg - Return true when this node prefers to be in a register. + bool preferReg() const { + // Undecided nodes (Value==0) go on the stack. + return Value > 0; + } + + /// mustSpill - Return True if this node is so biased that it must spill. + bool mustSpill() const { + // We must spill if Bias < -sum(weights) or the MustSpill flag was set. + // BiasN is saturated when MustSpill is set, make sure this still returns + // true when the RHS saturates. Note that SumLinkWeights includes Threshold. + return BiasN >= BiasP + SumLinkWeights; + } + + /// clear - Reset per-query data, but preserve frequencies that only depend on + // the CFG. + void clear(const BlockFrequency &Threshold) { + BiasN = BiasP = Value = 0; + SumLinkWeights = Threshold; + Links.clear(); + } + + /// addLink - Add a link to bundle b with weight w. + void addLink(unsigned b, BlockFrequency w) { + // Update cached sum. + SumLinkWeights += w; + + // There can be multiple links to the same bundle, add them up. + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) + if (I->second == b) { + I->first += w; + return; + } + // This must be the first link to b. + Links.push_back(std::make_pair(w, b)); + } + + /// addBias - Bias this node. + void addBias(BlockFrequency freq, BorderConstraint direction) { + switch (direction) { + default: + break; + case PrefReg: + BiasP += freq; + break; + case PrefSpill: + BiasN += freq; + break; + case MustSpill: + BiasN = BlockFrequency::getMaxFrequency(); + break; + } + } + + /// update - Recompute Value from Bias and Links. Return true when node + /// preference changes. + bool update(const Node nodes[], const BlockFrequency &Threshold) { + // Compute the weighted sum of inputs. + BlockFrequency SumN = BiasN; + BlockFrequency SumP = BiasP; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { + if (nodes[I->second].Value == -1) + SumN += I->first; + else if (nodes[I->second].Value == 1) + SumP += I->first; + } + + // Each weighted sum is going to be less than the total frequency of the + // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we + // will add a dead zone around 0 for two reasons: + // + // 1. It avoids arbitrary bias when all links are 0 as is possible during + // initial iterations. + // 2. It helps tame rounding errors when the links nominally sum to 0. + // + bool Before = preferReg(); + if (SumN >= SumP + Threshold) + Value = -1; + else if (SumP >= SumN + Threshold) + Value = 1; + else + Value = 0; + return Before != preferReg(); + } +}; + +bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { + MF = &mf; + bundles = &getAnalysis<EdgeBundles>(); + loops = &getAnalysis<MachineLoopInfo>(); + + assert(!nodes && "Leaking node array"); + nodes = new Node[bundles->getNumBundles()]; + + // Compute total ingoing and outgoing block frequencies for all bundles. + BlockFrequencies.resize(mf.getNumBlockIDs()); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + setThreshold(MBFI->getEntryFreq()); + for (auto &I : mf) { + unsigned Num = I.getNumber(); + BlockFrequencies[Num] = MBFI->getBlockFreq(&I); + } + + // We never change the function. + return false; +} + +void SpillPlacement::releaseMemory() { + delete[] nodes; + nodes = nullptr; +} + +/// activate - mark node n as active if it wasn't already. +void SpillPlacement::activate(unsigned n) { + if (ActiveNodes->test(n)) + return; + ActiveNodes->set(n); + nodes[n].clear(Threshold); + + // Very large bundles usually come from big switches, indirect branches, + // landing pads, or loops with many 'continue' statements. It is difficult to + // allocate registers when so many different blocks are involved. + // + // Give a small negative bias to large bundles such that a substantial + // fraction of the connected blocks need to be interested before we consider + // expanding the region through the bundle. This helps compile time by + // limiting the number of blocks visited and the number of links in the + // Hopfield network. + if (bundles->getBlocks(n).size() > 100) { + nodes[n].BiasP = 0; + nodes[n].BiasN = (MBFI->getEntryFreq() / 16); + } +} + +/// \brief Set the threshold for a given entry frequency. +/// +/// Set the threshold relative to \c Entry. Since the threshold is used as a +/// bound on the open interval (-Threshold;Threshold), 1 is the minimum +/// threshold. +void SpillPlacement::setThreshold(const BlockFrequency &Entry) { + // Apparently 2 is a good threshold when Entry==2^14, but we need to scale + // it. Divide by 2^13, rounding as appropriate. + uint64_t Freq = Entry.getFrequency(); + uint64_t Scaled = (Freq >> 13) + bool(Freq & (1 << 12)); + Threshold = std::max(UINT64_C(1), Scaled); +} + +/// addConstraints - Compute node biases and weights from a set of constraints. +/// Set a bit in NodeMask for each active node. +void SpillPlacement::addConstraints(ArrayRef<BlockConstraint> LiveBlocks) { + for (ArrayRef<BlockConstraint>::iterator I = LiveBlocks.begin(), + E = LiveBlocks.end(); I != E; ++I) { + BlockFrequency Freq = BlockFrequencies[I->Number]; + + // Live-in to block? + if (I->Entry != DontCare) { + unsigned ib = bundles->getBundle(I->Number, 0); + activate(ib); + nodes[ib].addBias(Freq, I->Entry); + } + + // Live-out from block? + if (I->Exit != DontCare) { + unsigned ob = bundles->getBundle(I->Number, 1); + activate(ob); + nodes[ob].addBias(Freq, I->Exit); + } + } +} + +/// addPrefSpill - Same as addConstraints(PrefSpill) +void SpillPlacement::addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong) { + for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + BlockFrequency Freq = BlockFrequencies[*I]; + if (Strong) + Freq += Freq; + unsigned ib = bundles->getBundle(*I, 0); + unsigned ob = bundles->getBundle(*I, 1); + activate(ib); + activate(ob); + nodes[ib].addBias(Freq, PrefSpill); + nodes[ob].addBias(Freq, PrefSpill); + } +} + +void SpillPlacement::addLinks(ArrayRef<unsigned> Links) { + for (ArrayRef<unsigned>::iterator I = Links.begin(), E = Links.end(); I != E; + ++I) { + unsigned Number = *I; + unsigned ib = bundles->getBundle(Number, 0); + unsigned ob = bundles->getBundle(Number, 1); + + // Ignore self-loops. + if (ib == ob) + continue; + activate(ib); + activate(ob); + if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) + Linked.push_back(ib); + if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) + Linked.push_back(ob); + BlockFrequency Freq = BlockFrequencies[Number]; + nodes[ib].addLink(ob, Freq); + nodes[ob].addLink(ib, Freq); + } +} + +bool SpillPlacement::scanActiveBundles() { + Linked.clear(); + RecentPositive.clear(); + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + nodes[n].update(nodes, Threshold); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (nodes[n].mustSpill()) + continue; + if (!nodes[n].Links.empty()) + Linked.push_back(n); + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + return !RecentPositive.empty(); +} + +/// iterate - Repeatedly update the Hopfield nodes until stability or the +/// maximum number of iterations is reached. +/// @param Linked - Numbers of linked nodes that need updating. +void SpillPlacement::iterate() { + // First update the recently positive nodes. They have likely received new + // negative bias that will turn them off. + while (!RecentPositive.empty()) + nodes[RecentPositive.pop_back_val()].update(nodes, Threshold); + + if (Linked.empty()) + return; + + // Run up to 10 iterations. The edge bundle numbering is closely related to + // basic block numbering, so there is a strong tendency towards chains of + // linked nodes with sequential numbers. By scanning the linked nodes + // backwards and forwards, we make it very likely that a single node can + // affect the entire network in a single iteration. That means very fast + // convergence, usually in a single iteration. + for (unsigned iteration = 0; iteration != 10; ++iteration) { + // Scan backwards, skipping the last node when iteration is not zero. When + // iteration is not zero, the last node was just updated. + bool Changed = false; + for (SmallVectorImpl<unsigned>::const_reverse_iterator I = + iteration == 0 ? Linked.rbegin() : std::next(Linked.rbegin()), + E = Linked.rend(); I != E; ++I) { + unsigned n = *I; + if (nodes[n].update(nodes, Threshold)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + } + if (!Changed || !RecentPositive.empty()) + return; + + // Scan forwards, skipping the first node which was just updated. + Changed = false; + for (SmallVectorImpl<unsigned>::const_iterator I = + std::next(Linked.begin()), E = Linked.end(); I != E; ++I) { + unsigned n = *I; + if (nodes[n].update(nodes, Threshold)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + } + if (!Changed || !RecentPositive.empty()) + return; + } +} + +void SpillPlacement::prepare(BitVector &RegBundles) { + Linked.clear(); + RecentPositive.clear(); + // Reuse RegBundles as our ActiveNodes vector. + ActiveNodes = &RegBundles; + ActiveNodes->clear(); + ActiveNodes->resize(bundles->getNumBundles()); +} + +bool +SpillPlacement::finish() { + assert(ActiveNodes && "Call prepare() first"); + + // Write preferences back to ActiveNodes. + bool Perfect = true; + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) + if (!nodes[n].preferReg()) { + ActiveNodes->reset(n); + Perfect = false; + } + ActiveNodes = nullptr; + return Perfect; +} diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h new file mode 100644 index 0000000..03dd58d --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -0,0 +1,164 @@ +//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This analysis computes the optimal spill code placement between basic blocks. +// +// The runOnMachineFunction() method only precomputes some profiling information +// about the CFG. The real work is done by prepare(), addConstraints(), and +// finish() which are called by the register allocator. +// +// Given a variable that is live across multiple basic blocks, and given +// constraints on the basic blocks where the variable is live, determine which +// edge bundles should have the variable in a register and which edge bundles +// should have the variable in a stack slot. +// +// The returned bit vector can be used to place optimal spill code at basic +// block entries and exits. Spill code placement inside a basic block is not +// considered. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SPILLPLACEMENT_H +#define LLVM_LIB_CODEGEN_SPILLPLACEMENT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/BlockFrequency.h" + +namespace llvm { + +class BitVector; +class EdgeBundles; +class MachineBasicBlock; +class MachineLoopInfo; +class MachineBlockFrequencyInfo; + +class SpillPlacement : public MachineFunctionPass { + struct Node; + const MachineFunction *MF; + const EdgeBundles *bundles; + const MachineLoopInfo *loops; + const MachineBlockFrequencyInfo *MBFI; + Node *nodes; + + // Nodes that are active in the current computation. Owned by the prepare() + // caller. + BitVector *ActiveNodes; + + // Nodes with active links. Populated by scanActiveBundles. + SmallVector<unsigned, 8> Linked; + + // Nodes that went positive during the last call to scanActiveBundles or + // iterate. + SmallVector<unsigned, 8> RecentPositive; + + // Block frequencies are computed once. Indexed by block number. + SmallVector<BlockFrequency, 8> BlockFrequencies; + + /// Decision threshold. A node gets the output value 0 if the weighted sum of + /// its inputs falls in the open interval (-Threshold;Threshold). + BlockFrequency Threshold; + +public: + static char ID; // Pass identification, replacement for typeid. + + SpillPlacement() : MachineFunctionPass(ID), nodes(nullptr) {} + ~SpillPlacement() override { releaseMemory(); } + + /// BorderConstraint - A basic block has separate constraints for entry and + /// exit. + enum BorderConstraint { + DontCare, ///< Block doesn't care / variable not live. + PrefReg, ///< Block entry/exit prefers a register. + PrefSpill, ///< Block entry/exit prefers a stack slot. + PrefBoth, ///< Block entry prefers both register and stack. + MustSpill ///< A register is impossible, variable must be spilled. + }; + + /// BlockConstraint - Entry and exit constraints for a basic block. + struct BlockConstraint { + unsigned Number; ///< Basic block number (from MBB::getNumber()). + BorderConstraint Entry : 8; ///< Constraint on block entry. + BorderConstraint Exit : 8; ///< Constraint on block exit. + + /// True when this block changes the value of the live range. This means + /// the block has a non-PHI def. When this is false, a live-in value on + /// the stack can be live-out on the stack without inserting a spill. + bool ChangesValue; + }; + + /// prepare - Reset state and prepare for a new spill placement computation. + /// @param RegBundles Bit vector to receive the edge bundles where the + /// variable should be kept in a register. Each bit + /// corresponds to an edge bundle, a set bit means the + /// variable should be kept in a register through the + /// bundle. A clear bit means the variable should be + /// spilled. This vector is retained. + void prepare(BitVector &RegBundles); + + /// addConstraints - Add constraints and biases. This method may be called + /// more than once to accumulate constraints. + /// @param LiveBlocks Constraints for blocks that have the variable live in or + /// live out. + void addConstraints(ArrayRef<BlockConstraint> LiveBlocks); + + /// addPrefSpill - Add PrefSpill constraints to all blocks listed. This is + /// equivalent to calling addConstraint with identical BlockConstraints with + /// Entry = Exit = PrefSpill, and ChangesValue = false. + /// + /// @param Blocks Array of block numbers that prefer to spill in and out. + /// @param Strong When true, double the negative bias for these blocks. + void addPrefSpill(ArrayRef<unsigned> Blocks, bool Strong); + + /// addLinks - Add transparent blocks with the given numbers. + void addLinks(ArrayRef<unsigned> Links); + + /// scanActiveBundles - Perform an initial scan of all bundles activated by + /// addConstraints and addLinks, updating their state. Add all the bundles + /// that now prefer a register to RecentPositive. + /// Prepare internal data structures for iterate. + /// Return true is there are any positive nodes. + bool scanActiveBundles(); + + /// iterate - Update the network iteratively until convergence, or new bundles + /// are found. + void iterate(); + + /// getRecentPositive - Return an array of bundles that became positive during + /// the previous call to scanActiveBundles or iterate. + ArrayRef<unsigned> getRecentPositive() { return RecentPositive; } + + /// finish - Compute the optimal spill code placement given the + /// constraints. No MustSpill constraints will be violated, and the smallest + /// possible number of PrefX constraints will be violated, weighted by + /// expected execution frequencies. + /// The selected bundles are returned in the bitvector passed to prepare(). + /// @return True if a perfect solution was found, allowing the variable to be + /// in a register through all relevant bundles. + bool finish(); + + /// getBlockFrequency - Return the estimated block execution frequency per + /// function invocation. + BlockFrequency getBlockFrequency(unsigned Number) const { + return BlockFrequencies[Number]; + } + +private: + bool runOnMachineFunction(MachineFunction&) override; + void getAnalysisUsage(AnalysisUsage&) const override; + void releaseMemory() override; + + void activate(unsigned); + void setThreshold(const BlockFrequency &Entry); +}; + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/CodeGen/Spiller.h b/contrib/llvm/lib/CodeGen/Spiller.h new file mode 100644 index 0000000..08f99ec --- /dev/null +++ b/contrib/llvm/lib/CodeGen/Spiller.h @@ -0,0 +1,42 @@ +//===-- llvm/CodeGen/Spiller.h - Spiller -*- C++ -*------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SPILLER_H +#define LLVM_LIB_CODEGEN_SPILLER_H + +namespace llvm { + + class LiveRangeEdit; + class MachineFunction; + class MachineFunctionPass; + class VirtRegMap; + + /// Spiller interface. + /// + /// Implementations are utility classes which insert spill or remat code on + /// demand. + class Spiller { + virtual void anchor(); + public: + virtual ~Spiller() = 0; + + /// spill - Spill the LRE.getParent() live interval. + virtual void spill(LiveRangeEdit &LRE) = 0; + + }; + + /// Create and return a spiller that will insert spill code directly instead + /// of deferring though VirtRegMap. + Spiller *createInlineSpiller(MachineFunctionPass &pass, + MachineFunction &mf, + VirtRegMap &vrm); + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp new file mode 100644 index 0000000..51dddab --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -0,0 +1,1411 @@ +//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#include "SplitKit.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumFinished, "Number of splits finished"); +STATISTIC(NumSimple, "Number of splits that were simple"); +STATISTIC(NumCopies, "Number of copies inserted for splitting"); +STATISTIC(NumRemats, "Number of rematerialized defs for splitting"); +STATISTIC(NumRepairs, "Number of invalid live ranges repaired"); + +//===----------------------------------------------------------------------===// +// Split Analysis +//===----------------------------------------------------------------------===// + +SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, + const MachineLoopInfo &mli) + : MF(vrm.getMachineFunction()), VRM(vrm), LIS(lis), Loops(mli), + TII(*MF.getSubtarget().getInstrInfo()), CurLI(nullptr), + LastSplitPoint(MF.getNumBlockIDs()) {} + +void SplitAnalysis::clear() { + UseSlots.clear(); + UseBlocks.clear(); + ThroughBlocks.clear(); + CurLI = nullptr; + DidRepairRange = false; +} + +SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { + const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + // FIXME: Handle multiple EH pad successors. + const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); + std::pair<SlotIndex, SlotIndex> &LSP = LastSplitPoint[Num]; + SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB); + + // Compute split points on the first call. The pair is independent of the + // current live interval. + if (!LSP.first.isValid()) { + MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); + if (FirstTerm == MBB->end()) + LSP.first = MBBEnd; + else + LSP.first = LIS.getInstructionIndex(FirstTerm); + + // If there is a landing pad successor, also find the call instruction. + if (!LPad) + return LSP.first; + // There may not be a call instruction (?) in which case we ignore LPad. + LSP.second = LSP.first; + for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin(); + I != E;) { + --I; + if (I->isCall()) { + LSP.second = LIS.getInstructionIndex(I); + break; + } + } + } + + // If CurLI is live into a landing pad successor, move the last split point + // back to the call that may throw. + if (!LPad || !LSP.second || !LIS.isLiveInToMBB(*CurLI, LPad)) + return LSP.first; + + // Find the value leaving MBB. + const VNInfo *VNI = CurLI->getVNInfoBefore(MBBEnd); + if (!VNI) + return LSP.first; + + // If the value leaving MBB was defined after the call in MBB, it can't + // really be live-in to the landing pad. This can happen if the landing pad + // has a PHI, and this register is undef on the exceptional edge. + // <rdar://problem/10664933> + if (!SlotIndex::isEarlierInstr(VNI->def, LSP.second) && VNI->def < MBBEnd) + return LSP.first; + + // Value is properly live-in to the landing pad. + // Only allow splits before the call. + return LSP.second; +} + +MachineBasicBlock::iterator +SplitAnalysis::getLastSplitPointIter(MachineBasicBlock *MBB) { + SlotIndex LSP = getLastSplitPoint(MBB->getNumber()); + if (LSP == LIS.getMBBEndIdx(MBB)) + return MBB->end(); + return LIS.getInstructionFromIndex(LSP); +} + +/// analyzeUses - Count instructions, basic blocks, and loops using CurLI. +void SplitAnalysis::analyzeUses() { + assert(UseSlots.empty() && "Call clear first"); + + // First get all the defs from the interval values. This provides the correct + // slots for early clobbers. + for (const VNInfo *VNI : CurLI->valnos) + if (!VNI->isPHIDef() && !VNI->isUnused()) + UseSlots.push_back(VNI->def); + + // Get use slots form the use-def chain. + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (MachineOperand &MO : MRI.use_nodbg_operands(CurLI->reg)) + if (!MO.isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(MO.getParent()).getRegSlot()); + + array_pod_sort(UseSlots.begin(), UseSlots.end()); + + // Remove duplicates, keeping the smaller slot for each instruction. + // That is what we want for early clobbers. + UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(), + SlotIndex::isSameInstr), + UseSlots.end()); + + // Compute per-live block info. + if (!calcLiveBlockInfo()) { + // FIXME: calcLiveBlockInfo found inconsistencies in the live range. + // I am looking at you, RegisterCoalescer! + DidRepairRange = true; + ++NumRepairs; + DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); + const_cast<LiveIntervals&>(LIS) + .shrinkToUses(const_cast<LiveInterval*>(CurLI)); + UseBlocks.clear(); + ThroughBlocks.clear(); + bool fixed = calcLiveBlockInfo(); + (void)fixed; + assert(fixed && "Couldn't fix broken live interval"); + } + + DEBUG(dbgs() << "Analyze counted " + << UseSlots.size() << " instrs in " + << UseBlocks.size() << " blocks, through " + << NumThroughBlocks << " blocks.\n"); +} + +/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks +/// where CurLI is live. +bool SplitAnalysis::calcLiveBlockInfo() { + ThroughBlocks.resize(MF.getNumBlockIDs()); + NumThroughBlocks = NumGapBlocks = 0; + if (CurLI->empty()) + return true; + + LiveInterval::const_iterator LVI = CurLI->begin(); + LiveInterval::const_iterator LVE = CurLI->end(); + + SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE; + UseI = UseSlots.begin(); + UseE = UseSlots.end(); + + // Loop over basic blocks where CurLI is live. + MachineFunction::iterator MFI = + LIS.getMBBFromIndex(LVI->start)->getIterator(); + for (;;) { + BlockInfo BI; + BI.MBB = &*MFI; + SlotIndex Start, Stop; + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + // If the block contains no uses, the range must be live through. At one + // point, RegisterCoalescer could create dangling ranges that ended + // mid-block. + if (UseI == UseE || *UseI >= Stop) { + ++NumThroughBlocks; + ThroughBlocks.set(BI.MBB->getNumber()); + // The range shouldn't end mid-block if there are no uses. This shouldn't + // happen. + if (LVI->end < Stop) + return false; + } else { + // This block has uses. Find the first and last uses in the block. + BI.FirstInstr = *UseI; + assert(BI.FirstInstr >= Start); + do ++UseI; + while (UseI != UseE && *UseI < Stop); + BI.LastInstr = UseI[-1]; + assert(BI.LastInstr < Stop); + + // LVI is the first live segment overlapping MBB. + BI.LiveIn = LVI->start <= Start; + + // When not live in, the first use should be a def. + if (!BI.LiveIn) { + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); + assert(LVI->start == BI.FirstInstr && "First instr should be a def"); + BI.FirstDef = BI.FirstInstr; + } + + // Look for gaps in the live range. + BI.LiveOut = true; + while (LVI->end < Stop) { + SlotIndex LastStop = LVI->end; + if (++LVI == LVE || LVI->start >= Stop) { + BI.LiveOut = false; + BI.LastInstr = LastStop; + break; + } + + if (LastStop < LVI->start) { + // There is a gap in the live range. Create duplicate entries for the + // live-in snippet and the live-out snippet. + ++NumGapBlocks; + + // Push the Live-in part. + BI.LiveOut = false; + UseBlocks.push_back(BI); + UseBlocks.back().LastInstr = LastStop; + + // Set up BI for the live-out part. + BI.LiveIn = false; + BI.LiveOut = true; + BI.FirstInstr = BI.FirstDef = LVI->start; + } + + // A Segment that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); + if (!BI.FirstDef) + BI.FirstDef = LVI->start; + } + + UseBlocks.push_back(BI); + + // LVI is now at LVE or LVI->end >= Stop. + if (LVI == LVE) + break; + } + + // Live segment ends exactly at Stop. Move to the next segment. + if (LVI->end == Stop && ++LVI == LVE) + break; + + // Pick the next basic block. + if (LVI->start < Stop) + ++MFI; + else + MFI = LIS.getMBBFromIndex(LVI->start)->getIterator(); + } + + assert(getNumLiveBlocks() == countLiveBlocks(CurLI) && "Bad block count"); + return true; +} + +unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { + if (cli->empty()) + return 0; + LiveInterval *li = const_cast<LiveInterval*>(cli); + LiveInterval::iterator LVI = li->begin(); + LiveInterval::iterator LVE = li->end(); + unsigned Count = 0; + + // Loop over basic blocks where li is live. + MachineFunction::const_iterator MFI = + LIS.getMBBFromIndex(LVI->start)->getIterator(); + SlotIndex Stop = LIS.getMBBEndIdx(&*MFI); + for (;;) { + ++Count; + LVI = li->advanceTo(LVI, Stop); + if (LVI == LVE) + return Count; + do { + ++MFI; + Stop = LIS.getMBBEndIdx(&*MFI); + } while (Stop <= LVI->start); + } +} + +bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { + unsigned OrigReg = VRM.getOriginal(CurLI->reg); + const LiveInterval &Orig = LIS.getInterval(OrigReg); + assert(!Orig.empty() && "Splitting empty interval?"); + LiveInterval::const_iterator I = Orig.find(Idx); + + // Range containing Idx should begin at Idx. + if (I != Orig.end() && I->start <= Idx) + return I->start == Idx; + + // Range does not contain Idx, previous must end at Idx. + return I != Orig.begin() && (--I)->end == Idx; +} + +void SplitAnalysis::analyze(const LiveInterval *li) { + clear(); + CurLI = li; + analyzeUses(); +} + + +//===----------------------------------------------------------------------===// +// Split Editor +//===----------------------------------------------------------------------===// + +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, + MachineDominatorTree &mdt, + MachineBlockFrequencyInfo &mbfi) + : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), TII(*vrm.getMachineFunction().getSubtarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getSubtarget().getRegisterInfo()), + MBFI(mbfi), Edit(nullptr), OpenIdx(0), SpillMode(SM_Partition), + RegAssign(Allocator) {} + +void SplitEditor::reset(LiveRangeEdit &LRE, ComplementSpillMode SM) { + Edit = &LRE; + SpillMode = SM; + OpenIdx = 0; + RegAssign.clear(); + Values.clear(); + + // Reset the LiveRangeCalc instances needed for this spill mode. + LRCalc[0].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + if (SpillMode) + LRCalc[1].reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT, + &LIS.getVNInfoAllocator()); + + // We don't need an AliasAnalysis since we will only be performing + // cheap-as-a-copy remats anyway. + Edit->anyRematerializable(nullptr); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void SplitEditor::dump() const { + if (RegAssign.empty()) { + dbgs() << " empty\n"; + return; + } + + for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) + dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); + dbgs() << '\n'; +} +#endif + +VNInfo *SplitEditor::defValue(unsigned RegIdx, + const VNInfo *ParentVNI, + SlotIndex Idx) { + assert(ParentVNI && "Mapping NULL value"); + assert(Idx.isValid() && "Invalid SlotIndex"); + assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + + // Create a new value. + VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); + + // Use insert for lookup, so we can add missing values with a second lookup. + std::pair<ValueMap::iterator, bool> InsP = + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), + ValueForcePair(VNI, false))); + + // This was the first time (RegIdx, ParentVNI) was mapped. + // Keep it as a simple def without any liveness. + if (InsP.second) + return VNI; + + // If the previous value was a simple mapping, add liveness for it now. + if (VNInfo *OldVNI = InsP.first->second.getPointer()) { + SlotIndex Def = OldVNI->def; + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); + // No longer a simple mapping. Switch to a complex, non-forced mapping. + InsP.first->second = ValueForcePair(); + } + + // This is a complex mapping, add liveness for VNI + SlotIndex Def = VNI->def; + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); + + return VNI; +} + +void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { + assert(ParentVNI && "Mapping NULL value"); + ValueForcePair &VFP = Values[std::make_pair(RegIdx, ParentVNI->id)]; + VNInfo *VNI = VFP.getPointer(); + + // ParentVNI was either unmapped or already complex mapped. Either way, just + // set the force bit. + if (!VNI) { + VFP.setInt(true); + return; + } + + // This was previously a single mapping. Make sure the old def is represented + // by a trivial live range. + SlotIndex Def = VNI->def; + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); + // Mark as complex mapped, forced. + VFP = ValueForcePair(nullptr, true); +} + +VNInfo *SplitEditor::defFromParent(unsigned RegIdx, + VNInfo *ParentVNI, + SlotIndex UseIdx, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I) { + MachineInstr *CopyMI = nullptr; + SlotIndex Def; + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + + // We may be trying to avoid interference that ends at a deleted instruction, + // so always begin RegIdx 0 early and all others late. + bool Late = RegIdx != 0; + + // Attempt cheap-as-a-copy rematerialization. + LiveRangeEdit::Remat RM(ParentVNI); + if (Edit->canRematerializeAt(RM, UseIdx, true)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late); + ++NumRemats; + } else { + // Can't remat, just insert a copy from parent. + CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) + .addReg(Edit->getReg()); + Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) + .getRegSlot(); + ++NumCopies; + } + + // Define the value in Reg. + return defValue(RegIdx, ParentVNI, Def); +} + +/// Create a new virtual register and live interval. +unsigned SplitEditor::openIntv() { + // Create the complement as index 0. + if (Edit->empty()) + Edit->createEmptyInterval(); + + // Create the open interval. + OpenIdx = Edit->size(); + Edit->createEmptyInterval(); + return OpenIdx; +} + +void SplitEditor::selectIntv(unsigned Idx) { + assert(Idx != 0 && "Cannot select the complement interval"); + assert(Idx < Edit->size() && "Can only select previously opened interval"); + DEBUG(dbgs() << " selectIntv " << OpenIdx << " -> " << Idx << '\n'); + OpenIdx = Idx; +} + +SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvBefore"); + DEBUG(dbgs() << " enterIntvBefore " << Idx); + Idx = Idx.getBaseIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvBefore called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI); + return VNI->def; +} + +SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before enterIntvAfter"); + DEBUG(dbgs() << " enterIntvAfter " << Idx); + Idx = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "enterIntvAfter called with invalid index"); + + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), + std::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + +SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before enterIntvAtEnd"); + SlotIndex End = LIS.getMBBEndIdx(&MBB); + SlotIndex Last = End.getPrevSlot(); + DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return End; + } + DEBUG(dbgs() << ": valno " << ParentVNI->id); + VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, + SA.getLastSplitPointIter(&MBB)); + RegAssign.insert(VNI->def, End, OpenIdx); + DEBUG(dump()); + return VNI->def; +} + +/// useIntv - indicate that all instructions in MBB should use OpenLI. +void SplitEditor::useIntv(const MachineBasicBlock &MBB) { + useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB)); +} + +void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) { + assert(OpenIdx && "openIntv not called before useIntv"); + DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):"); + RegAssign.insert(Start, End, OpenIdx); + DEBUG(dump()); +} + +SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before leaveIntvAfter"); + DEBUG(dbgs() << " leaveIntvAfter " << Idx); + + // The interval must be live beyond the instruction at Idx. + SlotIndex Boundary = Idx.getBoundaryIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Boundary); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Boundary.getNextSlot(); + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + MachineInstr *MI = LIS.getInstructionFromIndex(Boundary); + assert(MI && "No instruction at index"); + + // In spill mode, make live ranges as short as possible by inserting the copy + // before MI. This is only possible if that instruction doesn't redefine the + // value. The inserted COPY is not a kill, and we don't need to recompute + // the source live range. The spiller also won't try to hoist this copy. + if (SpillMode && !SlotIndex::isSameInstr(ParentVNI->def, Idx) && + MI->readsVirtualRegister(Edit->getReg())) { + forceRecompute(0, ParentVNI); + defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); + return Idx; + } + + VNInfo *VNI = defFromParent(0, ParentVNI, Boundary, *MI->getParent(), + std::next(MachineBasicBlock::iterator(MI))); + return VNI->def; +} + +SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { + assert(OpenIdx && "openIntv not called before leaveIntvBefore"); + DEBUG(dbgs() << " leaveIntvBefore " << Idx); + + // The interval must be live into the instruction at Idx. + Idx = Idx.getBaseIndex(); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Idx.getNextSlot(); + } + DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n'); + + MachineInstr *MI = LIS.getInstructionFromIndex(Idx); + assert(MI && "No instruction at index"); + VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI); + return VNI->def; +} + +SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { + assert(OpenIdx && "openIntv not called before leaveIntvAtTop"); + SlotIndex Start = LIS.getMBBStartIdx(&MBB); + DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); + + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + if (!ParentVNI) { + DEBUG(dbgs() << ": not live\n"); + return Start; + } + + VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB, + MBB.SkipPHIsAndLabels(MBB.begin())); + RegAssign.insert(Start, VNI->def, OpenIdx); + DEBUG(dump()); + return VNI->def; +} + +void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { + assert(OpenIdx && "openIntv not called before overlapIntv"); + const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) && + "Parent changes value in extended range"); + assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && + "Range cannot span basic blocks"); + + // The complement interval will be extended as needed by LRCalc.extend(). + if (ParentVNI) + forceRecompute(0, ParentVNI); + DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); + RegAssign.insert(Start, End, OpenIdx); + DEBUG(dump()); +} + +//===----------------------------------------------------------------------===// +// Spill modes +//===----------------------------------------------------------------------===// + +void SplitEditor::removeBackCopies(SmallVectorImpl<VNInfo*> &Copies) { + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); + DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); + RegAssignMap::iterator AssignI; + AssignI.setMap(RegAssign); + + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + SlotIndex Def = Copies[i]->def; + MachineInstr *MI = LIS.getInstructionFromIndex(Def); + assert(MI && "No instruction for back-copy"); + + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator MBBI(MI); + bool AtBegin; + do AtBegin = MBBI == MBB->begin(); + while (!AtBegin && (--MBBI)->isDebugValue()); + + DEBUG(dbgs() << "Removing " << Def << '\t' << *MI); + LIS.removeVRegDefAt(*LI, Def); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + + // Adjust RegAssign if a register assignment is killed at Def. We want to + // avoid calculating the live range of the source register if possible. + AssignI.find(Def.getPrevSlot()); + if (!AssignI.valid() || AssignI.start() >= Def) + continue; + // If MI doesn't kill the assigned register, just leave it. + if (AssignI.stop() != Def) + continue; + unsigned RegIdx = AssignI.value(); + if (AtBegin || !MBBI->readsVirtualRegister(Edit->getReg())) { + DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); + forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); + } else { + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); + DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); + AssignI.setStop(Kill); + } + } +} + +MachineBasicBlock* +SplitEditor::findShallowDominator(MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB) { + if (MBB == DefMBB) + return MBB; + assert(MDT.dominates(DefMBB, MBB) && "MBB must be dominated by the def."); + + const MachineLoopInfo &Loops = SA.Loops; + const MachineLoop *DefLoop = Loops.getLoopFor(DefMBB); + MachineDomTreeNode *DefDomNode = MDT[DefMBB]; + + // Best candidate so far. + MachineBasicBlock *BestMBB = MBB; + unsigned BestDepth = UINT_MAX; + + for (;;) { + const MachineLoop *Loop = Loops.getLoopFor(MBB); + + // MBB isn't in a loop, it doesn't get any better. All dominators have a + // higher frequency by definition. + if (!Loop) { + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " at depth 0\n"); + return MBB; + } + + // We'll never be able to exit the DefLoop. + if (Loop == DefLoop) { + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " in the same loop\n"); + return MBB; + } + + // Least busy dominator seen so far. + unsigned Depth = Loop->getLoopDepth(); + if (Depth < BestDepth) { + BestMBB = MBB; + BestDepth = Depth; + DEBUG(dbgs() << "Def in BB#" << DefMBB->getNumber() << " dominates BB#" + << MBB->getNumber() << " at depth " << Depth << '\n'); + } + + // Leave loop by going to the immediate dominator of the loop header. + // This is a bigger stride than simply walking up the dominator tree. + MachineDomTreeNode *IDom = MDT[Loop->getHeader()]->getIDom(); + + // Too far up the dominator tree? + if (!IDom || !MDT.dominates(DefDomNode, IDom)) + return BestMBB; + + MBB = IDom->getBlock(); + } +} + +void SplitEditor::hoistCopiesForSize() { + // Get the complement interval, always RegIdx 0. + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); + LiveInterval *Parent = &Edit->getParent(); + + // Track the nearest common dominator for all back-copies for each ParentVNI, + // indexed by ParentVNI->id. + typedef std::pair<MachineBasicBlock*, SlotIndex> DomPair; + SmallVector<DomPair, 8> NearestDom(Parent->getNumValNums()); + + // Find the nearest common dominator for parent values with multiple + // back-copies. If a single back-copy dominates, put it in DomPair.second. + for (VNInfo *VNI : LI->valnos) { + if (VNI->isUnused()) + continue; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + assert(ParentVNI && "Parent not live at complement def"); + + // Don't hoist remats. The complement is probably going to disappear + // completely anyway. + if (Edit->didRematerialize(ParentVNI)) + continue; + + MachineBasicBlock *ValMBB = LIS.getMBBFromIndex(VNI->def); + DomPair &Dom = NearestDom[ParentVNI->id]; + + // Keep directly defined parent values. This is either a PHI or an + // instruction in the complement range. All other copies of ParentVNI + // should be eliminated. + if (VNI->def == ParentVNI->def) { + DEBUG(dbgs() << "Direct complement def at " << VNI->def << '\n'); + Dom = DomPair(ValMBB, VNI->def); + continue; + } + // Skip the singly mapped values. There is nothing to gain from hoisting a + // single back-copy. + if (Values.lookup(std::make_pair(0, ParentVNI->id)).getPointer()) { + DEBUG(dbgs() << "Single complement def at " << VNI->def << '\n'); + continue; + } + + if (!Dom.first) { + // First time we see ParentVNI. VNI dominates itself. + Dom = DomPair(ValMBB, VNI->def); + } else if (Dom.first == ValMBB) { + // Two defs in the same block. Pick the earlier def. + if (!Dom.second.isValid() || VNI->def < Dom.second) + Dom.second = VNI->def; + } else { + // Different basic blocks. Check if one dominates. + MachineBasicBlock *Near = + MDT.findNearestCommonDominator(Dom.first, ValMBB); + if (Near == ValMBB) + // Def ValMBB dominates. + Dom = DomPair(ValMBB, VNI->def); + else if (Near != Dom.first) + // None dominate. Hoist to common dominator, need new def. + Dom = DomPair(Near, SlotIndex()); + } + + DEBUG(dbgs() << "Multi-mapped complement " << VNI->id << '@' << VNI->def + << " for parent " << ParentVNI->id << '@' << ParentVNI->def + << " hoist to BB#" << Dom.first->getNumber() << ' ' + << Dom.second << '\n'); + } + + // Insert the hoisted copies. + for (unsigned i = 0, e = Parent->getNumValNums(); i != e; ++i) { + DomPair &Dom = NearestDom[i]; + if (!Dom.first || Dom.second.isValid()) + continue; + // This value needs a hoisted copy inserted at the end of Dom.first. + VNInfo *ParentVNI = Parent->getValNumInfo(i); + MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(ParentVNI->def); + // Get a less loopy dominator than Dom.first. + Dom.first = findShallowDominator(Dom.first, DefMBB); + SlotIndex Last = LIS.getMBBEndIdx(Dom.first).getPrevSlot(); + Dom.second = + defFromParent(0, ParentVNI, Last, *Dom.first, + SA.getLastSplitPointIter(Dom.first))->def; + } + + // Remove redundant back-copies that are now known to be dominated by another + // def with the same value. + SmallVector<VNInfo*, 8> BackCopies; + for (VNInfo *VNI : LI->valnos) { + if (VNI->isUnused()) + continue; + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(VNI->def); + const DomPair &Dom = NearestDom[ParentVNI->id]; + if (!Dom.first || Dom.second == VNI->def) + continue; + BackCopies.push_back(VNI); + forceRecompute(0, ParentVNI); + } + removeBackCopies(BackCopies); +} + + +/// transferValues - Transfer all possible values to the new live ranges. +/// Values that were rematerialized are left alone, they need LRCalc.extend(). +bool SplitEditor::transferValues() { + bool Skipped = false; + RegAssignMap::const_iterator AssignI = RegAssign.begin(); + for (const LiveRange::Segment &S : Edit->getParent()) { + DEBUG(dbgs() << " blit " << S << ':'); + VNInfo *ParentVNI = S.valno; + // RegAssign has holes where RegIdx 0 should be used. + SlotIndex Start = S.start; + AssignI.advanceTo(Start); + do { + unsigned RegIdx; + SlotIndex End = S.end; + if (!AssignI.valid()) { + RegIdx = 0; + } else if (AssignI.start() <= Start) { + RegIdx = AssignI.value(); + if (AssignI.stop() < End) { + End = AssignI.stop(); + ++AssignI; + } + } else { + RegIdx = 0; + End = std::min(End, AssignI.start()); + } + + // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); + + // Check for a simply defined value that can be blitted directly. + ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); + if (VNInfo *VNI = VFP.getPointer()) { + DEBUG(dbgs() << ':' << VNI->id); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); + Start = End; + continue; + } + + // Skip values with forced recomputation. + if (VFP.getInt()) { + DEBUG(dbgs() << "(recalc)"); + Skipped = true; + Start = End; + continue; + } + + LiveRangeCalc &LRC = getLRCalc(RegIdx); + + // This value has multiple defs in RegIdx, but it wasn't rematerialized, + // so the live range is accurate. Add live-in blocks in [Start;End) to the + // LiveInBlocks. + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator(); + SlotIndex BlockStart, BlockEnd; + std::tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(&*MBB); + + // The first block may be live-in, or it may have its own def. + if (Start != BlockStart) { + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); + assert(VNI && "Missing def for complex mapped value"); + DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + // MBB has its own def. Is it also live-out? + if (BlockEnd <= End) + LRC.setLiveOutValue(&*MBB, VNI); + + // Skip to the next block for live-in. + ++MBB; + BlockStart = BlockEnd; + } + + // Handle the live-in blocks covered by [Start;End). + assert(Start <= BlockStart && "Expected live-in block"); + while (BlockStart < End) { + DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + BlockEnd = LIS.getMBBEndIdx(&*MBB); + if (BlockStart == ParentVNI->def) { + // This block has the def of a parent PHI, so it isn't live-in. + assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); + assert(VNI && "Missing def for complex mapped parent PHI"); + if (End >= BlockEnd) + LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well. + } else { + // This block needs a live-in value. The last block covered may not + // be live-out. + if (End < BlockEnd) + LRC.addLiveInBlock(LR, MDT[&*MBB], End); + else { + // Live-through, and we don't know the value. + LRC.addLiveInBlock(LR, MDT[&*MBB]); + LRC.setLiveOutValue(&*MBB, nullptr); + } + } + BlockStart = BlockEnd; + ++MBB; + } + Start = End; + } while (Start != S.end); + DEBUG(dbgs() << '\n'); + } + + LRCalc[0].calculateValues(); + if (SpillMode) + LRCalc[1].calculateValues(); + + return Skipped; +} + +void SplitEditor::extendPHIKillRanges() { + // Extend live ranges to be live-out for successor PHI values. + for (const VNInfo *PHIVNI : Edit->getParent().valnos) { + if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); + LiveRangeCalc &LRC = getLRCalc(RegIdx); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex End = LIS.getMBBEndIdx(*PI); + SlotIndex LastUse = End.getPrevSlot(); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + if (Edit->getParent().liveAt(LastUse)) { + assert(RegAssign.lookup(LastUse) == RegIdx && + "Different register assignment in phi predecessor"); + LRC.extend(LR, End); + } + } + } +} + +/// rewriteAssigned - Rewrite all uses of Edit->getReg(). +void SplitEditor::rewriteAssigned(bool ExtendRanges) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = *RI; + MachineInstr *MI = MO.getParent(); + ++RI; + // LiveDebugVariables should have handled all DBG_VALUE instructions. + if (MI->isDebugValue()) { + DEBUG(dbgs() << "Zapping " << *MI); + MO.setReg(0); + continue; + } + + // <undef> operands don't really read the register, so it doesn't matter + // which register we choose. When the use operand is tied to a def, we must + // use the same register as the def, so just do that always. + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (MO.isDef() || MO.isUndef()) + Idx = Idx.getRegSlot(MO.isEarlyClobber()); + + // Rewrite to the mapped register at Idx. + unsigned RegIdx = RegAssign.lookup(Idx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + MO.setReg(LI->reg); + DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' + << Idx << ':' << RegIdx << '\t' << *MI); + + // Extend liveness to Idx if the instruction reads reg. + if (!ExtendRanges || MO.isUndef()) + continue; + + // Skip instructions that don't read Reg. + if (MO.isDef()) { + if (!MO.getSubReg() && !MO.isEarlyClobber()) + continue; + // We may wan't to extend a live range for a partial redef, or for a use + // tied to an early clobber. + Idx = Idx.getPrevSlot(); + if (!Edit->getParent().liveAt(Idx)) + continue; + } else + Idx = Idx.getRegSlot(true); + + getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); + } +} + +void SplitEditor::deleteRematVictims() { + SmallVector<MachineInstr*, 8> Dead; + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ + LiveInterval *LI = &LIS.getInterval(*I); + for (const LiveRange::Segment &S : LI->segments) { + // Dead defs end at the dead slot. + if (S.end != S.valno->def.getDeadSlot()) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(S.valno->def); + assert(MI && "Missing instruction for dead def"); + MI->addRegisterDead(LI->reg, &TRI); + + if (!MI->allDefsAreDead()) + continue; + + DEBUG(dbgs() << "All defs dead: " << *MI); + Dead.push_back(MI); + } + } + + if (Dead.empty()) + return; + + Edit->eliminateDeadDefs(Dead); +} + +void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) { + ++NumFinished; + + // At this point, the live intervals in Edit contain VNInfos corresponding to + // the inserted copies. + + // Add the original defs from the parent interval. + for (const VNInfo *ParentVNI : Edit->getParent().valnos) { + if (ParentVNI->isUnused()) + continue; + unsigned RegIdx = RegAssign.lookup(ParentVNI->def); + defValue(RegIdx, ParentVNI, ParentVNI->def); + + // Force rematted values to be recomputed everywhere. + // The new live ranges may be truncated. + if (Edit->didRematerialize(ParentVNI)) + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + forceRecompute(i, ParentVNI); + } + + // Hoist back-copies to the complement interval when in spill mode. + switch (SpillMode) { + case SM_Partition: + // Leave all back-copies as is. + break; + case SM_Size: + hoistCopiesForSize(); + break; + case SM_Speed: + llvm_unreachable("Spill mode 'speed' not implemented yet"); + } + + // Transfer the simply mapped values, check if any are skipped. + bool Skipped = transferValues(); + if (Skipped) + extendPHIKillRanges(); + else + ++NumSimple; + + // Rewrite virtual registers, possibly extending ranges. + rewriteAssigned(Skipped); + + // Delete defs that were rematted everywhere. + if (Skipped) + deleteRematVictims(); + + // Get rid of unused values and set phi-kill flags. + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + LI.RenumberValues(); + } + + // Provide a reverse mapping from original indices to Edit ranges. + if (LRMap) { + LRMap->clear(); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + LRMap->push_back(i); + } + + // Now check if any registers were separated into multiple components. + ConnectedVNInfoEqClasses ConEQ(LIS); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) { + // Don't use iterators, they are invalidated by create() below. + unsigned VReg = Edit->get(i); + LiveInterval &LI = LIS.getInterval(VReg); + SmallVector<LiveInterval*, 8> SplitLIs; + LIS.splitSeparateComponents(LI, SplitLIs); + unsigned Original = VRM.getOriginal(VReg); + for (LiveInterval *SplitLI : SplitLIs) + VRM.setIsSplitFromReg(SplitLI->reg, Original); + + // The new intervals all map back to i. + if (LRMap) + LRMap->resize(Edit->size(), i); + } + + // Calculate spill weight and allocation hints for new intervals. + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI); + + assert(!LRMap || LRMap->size() == Edit->size()); +} + + +//===----------------------------------------------------------------------===// +// Single Block Splitting +//===----------------------------------------------------------------------===// + +bool SplitAnalysis::shouldSplitSingleBlock(const BlockInfo &BI, + bool SingleInstrs) const { + // Always split for multiple instructions. + if (!BI.isOneInstr()) + return true; + // Don't split for single instructions unless explicitly requested. + if (!SingleInstrs) + return false; + // Splitting a live-through range always makes progress. + if (BI.LiveIn && BI.LiveOut) + return true; + // No point in isolating a copy. It has no register class constraints. + if (LIS.getInstructionFromIndex(BI.FirstInstr)->isCopyLike()) + return false; + // Finally, don't isolate an end point that was created by earlier splits. + return isOriginalEndpoint(BI.FirstInstr); +} + +void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { + openIntv(); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstInstr, + LastSplitPoint)); + if (!BI.LiveOut || BI.LastInstr < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastInstr)); + } else { + // The last use is after the last valid split point. + SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastInstr); + } +} + + +//===----------------------------------------------------------------------===// +// Global Live Range Splitting Support +//===----------------------------------------------------------------------===// + +// These methods support a method of global live range splitting that uses a +// global algorithm to decide intervals for CFG edges. They will insert split +// points and color intervals in basic blocks while avoiding interference. +// +// Note that splitSingleBlock is also useful for blocks where both CFG edges +// are on the stack. + +void SplitEditor::splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter){ + SlotIndex Start, Stop; + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum); + + DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop + << ") intf " << LeaveBefore << '-' << EnterAfter + << ", live-through " << IntvIn << " -> " << IntvOut); + + assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks"); + + assert((!LeaveBefore || LeaveBefore < Stop) && "Interference after block"); + assert((!IntvIn || !LeaveBefore || LeaveBefore > Start) && "Impossible intf"); + assert((!EnterAfter || EnterAfter >= Start) && "Interference before block"); + + MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum); + + if (!IntvOut) { + DEBUG(dbgs() << ", spill on entry.\n"); + // + // <<<<<<<<< Possible LeaveBefore interference. + // |-----------| Live through. + // -____________ Spill on entry. + // + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAtTop(*MBB); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + (void)Idx; + return; + } + + if (!IntvIn) { + DEBUG(dbgs() << ", reload on exit.\n"); + // + // >>>>>>> Possible EnterAfter interference. + // |-----------| Live through. + // ___________-- Reload on exit. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAtEnd(*MBB); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + (void)Idx; + return; + } + + if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) { + DEBUG(dbgs() << ", straight through.\n"); + // + // |-----------| Live through. + // ------------- Straight through, same intv, no interference. + // + selectIntv(IntvOut); + useIntv(Start, Stop); + return; + } + + // We cannot legally insert splits after LSP. + SlotIndex LSP = SA.getLastSplitPoint(MBBNum); + assert((!IntvOut || !EnterAfter || EnterAfter < LSP) && "Impossible intf"); + + if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter || + LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) { + DEBUG(dbgs() << ", switch avoiding interference.\n"); + // + // >>>> <<<< Non-overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ------======= Switch intervals between interference. + // + selectIntv(IntvOut); + SlotIndex Idx; + if (LeaveBefore && LeaveBefore < LSP) { + Idx = enterIntvBefore(LeaveBefore); + useIntv(Idx, Stop); + } else { + Idx = enterIntvAtEnd(*MBB); + } + selectIntv(IntvIn); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + DEBUG(dbgs() << ", create local intv for interference.\n"); + // + // >>><><><><<<< Overlapping EnterAfter/LeaveBefore interference. + // |-----------| Live through. + // ==---------== Switch intervals before/after interference. + // + assert(LeaveBefore <= EnterAfter && "Missed case"); + + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + selectIntv(IntvIn); + Idx = leaveIntvBefore(LeaveBefore); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); +} + + +void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore) { + SlotIndex Start, Stop; + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstInstr << '-' << BI.LastInstr + << ", reg-in " << IntvIn << ", leave before " << LeaveBefore + << (BI.LiveOut ? ", stack-out" : ", killed in block")); + + assert(IntvIn && "Must have register in"); + assert(BI.LiveIn && "Must be live-in"); + assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference"); + + if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastInstr)) { + DEBUG(dbgs() << " before interference.\n"); + // + // <<< Interference after kill. + // |---o---x | Killed in block. + // ========= Use IntvIn everywhere. + // + selectIntv(IntvIn); + useIntv(Start, BI.LastInstr); + return; + } + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + if (!LeaveBefore || LeaveBefore > BI.LastInstr.getBoundaryIndex()) { + // + // <<< Possible interference after last use. + // |---o---o---| Live-out on stack. + // =========____ Leave IntvIn after last use. + // + // < Interference after last use. + // |---o---o--o| Live-out on stack, late last use. + // ============ Copy to stack after LSP, overlap IntvIn. + // \_____ Stack interval is live-out. + // + if (BI.LastInstr < LSP) { + DEBUG(dbgs() << ", spill after last use before interference.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvAfter(BI.LastInstr); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } else { + DEBUG(dbgs() << ", spill before last split point.\n"); + selectIntv(IntvIn); + SlotIndex Idx = leaveIntvBefore(LSP); + overlapIntv(Idx, BI.LastInstr); + useIntv(Start, Idx); + assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference"); + } + return; + } + + // The interference is overlapping somewhere we wanted to use IntvIn. That + // means we need to create a local interval that can be allocated a + // different register. + unsigned LocalIntv = openIntv(); + (void)LocalIntv; + DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n"); + + if (!BI.LiveOut || BI.LastInstr < LSP) { + // + // <<<<<<< Interference overlapping uses. + // |---o---o---| Live-out on stack. + // =====----____ Leave IntvIn before interference, then spill. + // + SlotIndex To = leaveIntvAfter(BI.LastInstr); + SlotIndex From = enterIntvBefore(LeaveBefore); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); + return; + } + + // <<<<<<< Interference overlapping uses. + // |---o---o--o| Live-out on stack, late last use. + // =====------- Copy to stack before LSP, overlap LocalIntv. + // \_____ Stack interval is live-out. + // + SlotIndex To = leaveIntvBefore(LSP); + overlapIntv(To, BI.LastInstr); + SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore)); + useIntv(From, To); + selectIntv(IntvIn); + useIntv(Start, From); + assert((!LeaveBefore || From <= LeaveBefore) && "Interference"); +} + +void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter) { + SlotIndex Start, Stop; + std::tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); + + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop + << "), uses " << BI.FirstInstr << '-' << BI.LastInstr + << ", reg-out " << IntvOut << ", enter after " << EnterAfter + << (BI.LiveIn ? ", stack-in" : ", defined in block")); + + SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber()); + + assert(IntvOut && "Must have register out"); + assert(BI.LiveOut && "Must be live-out"); + assert((!EnterAfter || EnterAfter < LSP) && "Bad interference"); + + if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstInstr)) { + DEBUG(dbgs() << " after interference.\n"); + // + // >>>> Interference before def. + // | o---o---| Defined in block. + // ========= Use IntvOut everywhere. + // + selectIntv(IntvOut); + useIntv(BI.FirstInstr, Stop); + return; + } + + if (!EnterAfter || EnterAfter < BI.FirstInstr.getBaseIndex()) { + DEBUG(dbgs() << ", reload after interference.\n"); + // + // >>>> Interference before def. + // |---o---o---| Live-through, stack-in. + // ____========= Enter IntvOut before first use. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstInstr)); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + return; + } + + // The interference is overlapping somewhere we wanted to use IntvOut. That + // means we need to create a local interval that can be allocated a + // different register. + DEBUG(dbgs() << ", interference overlaps uses.\n"); + // + // >>>>>>> Interference overlapping uses. + // |---o---o---| Live-through, stack-in. + // ____---====== Create local interval for interference range. + // + selectIntv(IntvOut); + SlotIndex Idx = enterIntvAfter(EnterAfter); + useIntv(Idx, Stop); + assert((!EnterAfter || Idx >= EnterAfter) && "Interference"); + + openIntv(); + SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstInstr)); + useIntv(From, Idx); +} diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h new file mode 100644 index 0000000..69c65ff --- /dev/null +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -0,0 +1,471 @@ +//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the SplitAnalysis class as well as mutator functions for +// live range splitting. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H +#define LLVM_LIB_CODEGEN_SPLITKIT_H + +#include "LiveRangeCalc.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IntervalMap.h" +#include "llvm/ADT/SmallPtrSet.h" + +namespace llvm { + +class ConnectedVNInfoEqClasses; +class LiveInterval; +class LiveIntervals; +class LiveRangeEdit; +class MachineBlockFrequencyInfo; +class MachineInstr; +class MachineLoopInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class TargetRegisterInfo; +class VirtRegMap; +class VNInfo; +class raw_ostream; + +/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting +/// opportunities. +class LLVM_LIBRARY_VISIBILITY SplitAnalysis { +public: + const MachineFunction &MF; + const VirtRegMap &VRM; + const LiveIntervals &LIS; + const MachineLoopInfo &Loops; + const TargetInstrInfo &TII; + + /// Additional information about basic blocks where the current variable is + /// live. Such a block will look like one of these templates: + /// + /// 1. | o---x | Internal to block. Variable is only live in this block. + /// 2. |---x | Live-in, kill. + /// 3. | o---| Def, live-out. + /// 4. |---x o---| Live-in, kill, def, live-out. Counted by NumGapBlocks. + /// 5. |---o---o---| Live-through with uses or defs. + /// 6. |-----------| Live-through without uses. Counted by NumThroughBlocks. + /// + /// Two BlockInfo entries are created for template 4. One for the live-in + /// segment, and one for the live-out segment. These entries look as if the + /// block were split in the middle where the live range isn't live. + /// + /// Live-through blocks without any uses don't get BlockInfo entries. They + /// are simply listed in ThroughBlocks instead. + /// + struct BlockInfo { + MachineBasicBlock *MBB; + SlotIndex FirstInstr; ///< First instr accessing current reg. + SlotIndex LastInstr; ///< Last instr accessing current reg. + SlotIndex FirstDef; ///< First non-phi valno->def, or SlotIndex(). + bool LiveIn; ///< Current reg is live in. + bool LiveOut; ///< Current reg is live out. + + /// isOneInstr - Returns true when this BlockInfo describes a single + /// instruction. + bool isOneInstr() const { + return SlotIndex::isSameInstr(FirstInstr, LastInstr); + } + }; + +private: + // Current live interval. + const LiveInterval *CurLI; + + // Sorted slot indexes of using instructions. + SmallVector<SlotIndex, 8> UseSlots; + + /// LastSplitPoint - Last legal split point in each basic block in the current + /// function. The first entry is the first terminator, the second entry is the + /// last valid split point for a variable that is live in to a landing pad + /// successor. + SmallVector<std::pair<SlotIndex, SlotIndex>, 8> LastSplitPoint; + + /// UseBlocks - Blocks where CurLI has uses. + SmallVector<BlockInfo, 8> UseBlocks; + + /// NumGapBlocks - Number of duplicate entries in UseBlocks for blocks where + /// the live range has a gap. + unsigned NumGapBlocks; + + /// ThroughBlocks - Block numbers where CurLI is live through without uses. + BitVector ThroughBlocks; + + /// NumThroughBlocks - Number of live-through blocks. + unsigned NumThroughBlocks; + + /// DidRepairRange - analyze was forced to shrinkToUses(). + bool DidRepairRange; + + SlotIndex computeLastSplitPoint(unsigned Num); + + // Sumarize statistics by counting instructions using CurLI. + void analyzeUses(); + + /// calcLiveBlockInfo - Compute per-block information about CurLI. + bool calcLiveBlockInfo(); + +public: + SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, + const MachineLoopInfo &mli); + + /// analyze - set CurLI to the specified interval, and analyze how it may be + /// split. + void analyze(const LiveInterval *li); + + /// didRepairRange() - Returns true if CurLI was invalid and has been repaired + /// by analyze(). This really shouldn't happen, but sometimes the coalescer + /// can create live ranges that end in mid-air. + bool didRepairRange() const { return DidRepairRange; } + + /// clear - clear all data structures so SplitAnalysis is ready to analyze a + /// new interval. + void clear(); + + /// getParent - Return the last analyzed interval. + const LiveInterval &getParent() const { return *CurLI; } + + /// getLastSplitPoint - Return the base index of the last valid split point + /// in the basic block numbered Num. + SlotIndex getLastSplitPoint(unsigned Num) { + // Inline the common simple case. + if (LastSplitPoint[Num].first.isValid() && + !LastSplitPoint[Num].second.isValid()) + return LastSplitPoint[Num].first; + return computeLastSplitPoint(Num); + } + + /// getLastSplitPointIter - Returns the last split point as an iterator. + MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock*); + + /// isOriginalEndpoint - Return true if the original live range was killed or + /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, + /// and 'use' for an early-clobber def. + /// This can be used to recognize code inserted by earlier live range + /// splitting. + bool isOriginalEndpoint(SlotIndex Idx) const; + + /// getUseSlots - Return an array of SlotIndexes of instructions using CurLI. + /// This include both use and def operands, at most one entry per instruction. + ArrayRef<SlotIndex> getUseSlots() const { return UseSlots; } + + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks + /// where CurLI has uses. + ArrayRef<BlockInfo> getUseBlocks() const { return UseBlocks; } + + /// getNumThroughBlocks - Return the number of through blocks. + unsigned getNumThroughBlocks() const { return NumThroughBlocks; } + + /// isThroughBlock - Return true if CurLI is live through MBB without uses. + bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } + + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } + + /// getNumLiveBlocks - Return the number of blocks where CurLI is live. + unsigned getNumLiveBlocks() const { + return getUseBlocks().size() - NumGapBlocks + getNumThroughBlocks(); + } + + /// countLiveBlocks - Return the number of blocks where li is live. This is + /// guaranteed to return the same number as getNumLiveBlocks() after calling + /// analyze(li). + unsigned countLiveBlocks(const LiveInterval *li) const; + + typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet; + + /// shouldSplitSingleBlock - Returns true if it would help to create a local + /// live range for the instructions in BI. There is normally no benefit to + /// creating a live range for a single instruction, but it does enable + /// register class inflation if the instruction has a restricted register + /// class. + /// + /// @param BI The block to be isolated. + /// @param SingleInstrs True when single instructions should be isolated. + bool shouldSplitSingleBlock(const BlockInfo &BI, bool SingleInstrs) const; +}; + + +/// SplitEditor - Edit machine code and LiveIntervals for live range +/// splitting. +/// +/// - Create a SplitEditor from a SplitAnalysis. +/// - Start a new live interval with openIntv. +/// - Mark the places where the new interval is entered using enterIntv* +/// - Mark the ranges where the new interval is used with useIntv* +/// - Mark the places where the interval is exited with exitIntv*. +/// - Finish the current interval with closeIntv and repeat from 2. +/// - Rewrite instructions with finish(). +/// +class LLVM_LIBRARY_VISIBILITY SplitEditor { + SplitAnalysis &SA; + LiveIntervals &LIS; + VirtRegMap &VRM; + MachineRegisterInfo &MRI; + MachineDominatorTree &MDT; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; + +public: + + /// ComplementSpillMode - Select how the complement live range should be + /// created. SplitEditor automatically creates interval 0 to contain + /// anything that isn't added to another interval. This complement interval + /// can get quite complicated, and it can sometimes be an advantage to allow + /// it to overlap the other intervals. If it is going to spill anyway, no + /// registers are wasted by keeping a value in two places at the same time. + enum ComplementSpillMode { + /// SM_Partition(Default) - Try to create the complement interval so it + /// doesn't overlap any other intervals, and the original interval is + /// partitioned. This may require a large number of back copies and extra + /// PHI-defs. Only segments marked with overlapIntv will be overlapping. + SM_Partition, + + /// SM_Size - Overlap intervals to minimize the number of inserted COPY + /// instructions. Copies to the complement interval are hoisted to their + /// common dominator, so only one COPY is required per value in the + /// complement interval. This also means that no extra PHI-defs need to be + /// inserted in the complement interval. + SM_Size, + + /// SM_Speed - Overlap intervals to minimize the expected execution + /// frequency of the inserted copies. This is very similar to SM_Size, but + /// the complement interval may get some extra PHI-defs. + SM_Speed + }; + +private: + + /// Edit - The current parent register and new intervals created. + LiveRangeEdit *Edit; + + /// Index into Edit of the currently open interval. + /// The index 0 is used for the complement, so the first interval started by + /// openIntv will be 1. + unsigned OpenIdx; + + /// The current spill mode, selected by reset(). + ComplementSpillMode SpillMode; + + typedef IntervalMap<SlotIndex, unsigned> RegAssignMap; + + /// Allocator for the interval map. This will eventually be shared with + /// SlotIndexes and LiveIntervals. + RegAssignMap::Allocator Allocator; + + /// RegAssign - Map of the assigned register indexes. + /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at + /// Idx. + RegAssignMap RegAssign; + + typedef PointerIntPair<VNInfo*, 1> ValueForcePair; + typedef DenseMap<std::pair<unsigned, unsigned>, ValueForcePair> ValueMap; + + /// Values - keep track of the mapping from parent values to values in the new + /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: + /// + /// 1. No entry - the value is not mapped to Edit.get(RegIdx). + /// 2. (Null, false) - the value is mapped to multiple values in + /// Edit.get(RegIdx). Each value is represented by a minimal live range at + /// its def. The full live range can be inferred exactly from the range + /// of RegIdx in RegAssign. + /// 3. (Null, true). As above, but the ranges in RegAssign are too large, and + /// the live range must be recomputed using LiveRangeCalc::extend(). + /// 4. (VNI, false) The value is mapped to a single new value. + /// The new value has no live ranges anywhere. + ValueMap Values; + + /// LRCalc - Cache for computing live ranges and SSA update. Each instance + /// can only handle non-overlapping live ranges, so use a separate + /// LiveRangeCalc instance for the complement interval when in spill mode. + LiveRangeCalc LRCalc[2]; + + /// getLRCalc - Return the LRCalc to use for RegIdx. In spill mode, the + /// complement interval can overlap the other intervals, so it gets its own + /// LRCalc instance. When not in spill mode, all intervals can share one. + LiveRangeCalc &getLRCalc(unsigned RegIdx) { + return LRCalc[SpillMode != SM_Partition && RegIdx != 0]; + } + + /// defValue - define a value in RegIdx from ParentVNI at Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in ParentLI. The new value is added to the value + /// map. + /// Return the new LI value. + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + + /// forceRecompute - Force the live range of ParentVNI in RegIdx to be + /// recomputed by LiveRangeCalc::extend regardless of the number of defs. + /// This is used for values whose live range doesn't match RegAssign exactly. + /// They could have rematerialized, or back-copies may have been moved. + void forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI); + + /// defFromParent - Define Reg from ParentVNI at UseIdx using either + /// rematerialization or a COPY from parent. Return the new value. + VNInfo *defFromParent(unsigned RegIdx, + VNInfo *ParentVNI, + SlotIndex UseIdx, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator I); + + /// removeBackCopies - Remove the copy instructions that defines the values + /// in the vector in the complement interval. + void removeBackCopies(SmallVectorImpl<VNInfo*> &Copies); + + /// getShallowDominator - Returns the least busy dominator of MBB that is + /// also dominated by DefMBB. Busy is measured by loop depth. + MachineBasicBlock *findShallowDominator(MachineBasicBlock *MBB, + MachineBasicBlock *DefMBB); + + /// hoistCopiesForSize - Hoist back-copies to the complement interval in a + /// way that minimizes code size. This implements the SM_Size spill mode. + void hoistCopiesForSize(); + + /// transferValues - Transfer values to the new ranges. + /// Return true if any ranges were skipped. + bool transferValues(); + + /// extendPHIKillRanges - Extend the ranges of all values killed by original + /// parent PHIDefs. + void extendPHIKillRanges(); + + /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. + void rewriteAssigned(bool ExtendRanges); + + /// deleteRematVictims - Delete defs that are dead after rematerializing. + void deleteRematVictims(); + +public: + /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. + /// Newly created intervals will be appended to newIntervals. + SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, + MachineDominatorTree&, MachineBlockFrequencyInfo &); + + /// reset - Prepare for a new split. + void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); + + /// Create a new virtual register and live interval. + /// Return the interval index, starting from 1. Interval index 0 is the + /// implicit complement interval. + unsigned openIntv(); + + /// currentIntv - Return the current interval index. + unsigned currentIntv() const { return OpenIdx; } + + /// selectIntv - Select a previously opened interval index. + void selectIntv(unsigned Idx); + + /// enterIntvBefore - Enter the open interval before the instruction at Idx. + /// If the parent interval is not live before Idx, a COPY is not inserted. + /// Return the beginning of the new live range. + SlotIndex enterIntvBefore(SlotIndex Idx); + + /// enterIntvAfter - Enter the open interval after the instruction at Idx. + /// Return the beginning of the new live range. + SlotIndex enterIntvAfter(SlotIndex Idx); + + /// enterIntvAtEnd - Enter the open interval at the end of MBB. + /// Use the open interval from the inserted copy to the MBB end. + /// Return the beginning of the new live range. + SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in MBB should use OpenLI. + void useIntv(const MachineBasicBlock &MBB); + + /// useIntv - indicate that all instructions in range should use OpenLI. + void useIntv(SlotIndex Start, SlotIndex End); + + /// leaveIntvAfter - Leave the open interval after the instruction at Idx. + /// Return the end of the live range. + SlotIndex leaveIntvAfter(SlotIndex Idx); + + /// leaveIntvBefore - Leave the open interval before the instruction at Idx. + /// Return the end of the live range. + SlotIndex leaveIntvBefore(SlotIndex Idx); + + /// leaveIntvAtTop - Leave the interval at the top of MBB. + /// Add liveness from the MBB top to the copy. + /// Return the end of the live range. + SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB); + + /// overlapIntv - Indicate that all instructions in range should use the open + /// interval, but also let the complement interval be live. + /// + /// This doubles the register pressure, but is sometimes required to deal with + /// register uses after the last valid split point. + /// + /// The Start index should be a return value from a leaveIntv* call, and End + /// should be in the same basic block. The parent interval must have the same + /// value across the range. + /// + void overlapIntv(SlotIndex Start, SlotIndex End); + + /// finish - after all the new live ranges have been created, compute the + /// remaining live range, and rewrite instructions to use the new registers. + /// @param LRMap When not null, this vector will map each live range in Edit + /// back to the indices returned by openIntv. + /// There may be extra indices created by dead code elimination. + void finish(SmallVectorImpl<unsigned> *LRMap = nullptr); + + /// dump - print the current interval mapping to dbgs(). + void dump() const; + + // ===--- High level methods ---=== + + /// splitSingleBlock - Split CurLI into a separate live interval around the + /// uses in a single block. This is intended to be used as part of a larger + /// split, and doesn't call finish(). + void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); + + /// splitLiveThroughBlock - Split CurLI in the given block such that it + /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in + /// the block, but they will be ignored when placing split points. + /// + /// @param MBBNum Block number. + /// @param IntvIn Interval index entering the block. + /// @param LeaveBefore When set, leave IntvIn before this point. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitLiveThroughBlock(unsigned MBBNum, + unsigned IntvIn, SlotIndex LeaveBefore, + unsigned IntvOut, SlotIndex EnterAfter); + + /// splitRegInBlock - Split CurLI in the given block such that it enters the + /// block in IntvIn and leaves it on the stack (or not at all). Split points + /// are placed in a way that avoids putting uses in the stack interval. This + /// may require creating a local interval when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvIn Interval index entering the block. Not 0. + /// @param LeaveBefore When set, leave IntvIn before this point. + void splitRegInBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvIn, SlotIndex LeaveBefore); + + /// splitRegOutBlock - Split CurLI in the given block such that it enters the + /// block on the stack (or isn't live-in at all) and leaves it in IntvOut. + /// Split points are placed to avoid interference and such that the uses are + /// not in the stack interval. This may require creating a local interval + /// when there is interference. + /// + /// @param BI Block descriptor. + /// @param IntvOut Interval index leaving the block. + /// @param EnterAfter When set, enter IntvOut after this point. + void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI, + unsigned IntvOut, SlotIndex EnterAfter); +}; + +} + +#endif diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp new file mode 100644 index 0000000..3541b33 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -0,0 +1,784 @@ +//===-- StackColoring.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements the stack-coloring optimization that looks for +// lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END), +// which represent the possible lifetime of stack slots. It attempts to +// merge disjoint stack slots and reduce the used stack space. +// NOTE: This pass is not StackSlotColoring, which optimizes spill slots. +// +// TODO: In the future we plan to improve stack coloring in the following ways: +// 1. Allow merging multiple small slots into a single larger slot at different +// offsets. +// 2. Merge this pass with StackSlotColoring and allow merging of allocas with +// spill slots. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "stackcoloring" + +static cl::opt<bool> +DisableColoring("no-stack-coloring", + cl::init(false), cl::Hidden, + cl::desc("Disable stack coloring")); + +/// The user may write code that uses allocas outside of the declared lifetime +/// zone. This can happen when the user returns a reference to a local +/// data-structure. We can detect these cases and decide not to optimize the +/// code. If this flag is enabled, we try to save the user. +static cl::opt<bool> +ProtectFromEscapedAllocas("protect-from-escaped-allocas", + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that " + "are broken")); + +STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); +STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); +STATISTIC(StackSlotMerged, "Number of stack slot merged."); +STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); + +//===----------------------------------------------------------------------===// +// StackColoring Pass +//===----------------------------------------------------------------------===// + +namespace { +/// StackColoring - A machine pass for merging disjoint stack allocations, +/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions. +class StackColoring : public MachineFunctionPass { + MachineFrameInfo *MFI; + MachineFunction *MF; + + /// A class representing liveness information for a single basic block. + /// Each bit in the BitVector represents the liveness property + /// for a different stack slot. + struct BlockLifetimeInfo { + /// Which slots BEGINs in each basic block. + BitVector Begin; + /// Which slots ENDs in each basic block. + BitVector End; + /// Which slots are marked as LIVE_IN, coming into each basic block. + BitVector LiveIn; + /// Which slots are marked as LIVE_OUT, coming out of each basic block. + BitVector LiveOut; + }; + + /// Maps active slots (per bit) for each basic block. + typedef DenseMap<const MachineBasicBlock*, BlockLifetimeInfo> LivenessMap; + LivenessMap BlockLiveness; + + /// Maps serial numbers to basic blocks. + DenseMap<const MachineBasicBlock*, int> BasicBlocks; + /// Maps basic blocks to a serial number. + SmallVector<const MachineBasicBlock*, 8> BasicBlockNumbering; + + /// Maps liveness intervals for each slot. + SmallVector<std::unique_ptr<LiveInterval>, 16> Intervals; + /// VNInfo is used for the construction of LiveIntervals. + VNInfo::Allocator VNInfoAllocator; + /// SlotIndex analysis object. + SlotIndexes *Indexes; + /// The stack protector object. + StackProtector *SP; + + /// The list of lifetime markers found. These markers are to be removed + /// once the coloring is done. + SmallVector<MachineInstr*, 8> Markers; + +public: + static char ID; + StackColoring() : MachineFunctionPass(ID) { + initializeStackColoringPass(*PassRegistry::getPassRegistry()); + } + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// Debug. + void dump() const; + + /// Removes all of the lifetime marker instructions from the function. + /// \returns true if any markers were removed. + bool removeAllMarkers(); + + /// Scan the machine function and find all of the lifetime markers. + /// Record the findings in the BEGIN and END vectors. + /// \returns the number of markers found. + unsigned collectMarkers(unsigned NumSlot); + + /// Perform the dataflow calculation and calculate the lifetime for each of + /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and + /// LifetimeLIVE_OUT maps that represent which stack slots are live coming + /// in and out blocks. + void calculateLocalLiveness(); + + /// Construct the LiveIntervals for the slots. + void calculateLiveIntervals(unsigned NumSlots); + + /// Go over the machine function and change instructions which use stack + /// slots to use the joint slots. + void remapInstructions(DenseMap<int, int> &SlotRemap); + + /// The input program may contain instructions which are not inside lifetime + /// markers. This can happen due to a bug in the compiler or due to a bug in + /// user code (for example, returning a reference to a local variable). + /// This procedure checks all of the instructions in the function and + /// invalidates lifetime ranges which do not contain all of the instructions + /// which access that frame slot. + void removeInvalidSlotRanges(); + + /// Map entries which point to other entries to their destination. + /// A->B->C becomes A->C. + void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots); +}; +} // end anonymous namespace + +char StackColoring::ID = 0; +char &llvm::StackColoringID = StackColoring::ID; + +INITIALIZE_PASS_BEGIN(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(StackProtector) +INITIALIZE_PASS_END(StackColoring, + "stack-coloring", "Merge disjoint stack slots", false, false) + +void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineDominatorTree>(); + AU.addPreserved<MachineDominatorTree>(); + AU.addRequired<SlotIndexes>(); + AU.addRequired<StackProtector>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +void StackColoring::dump() const { + for (MachineBasicBlock *MBB : depth_first(MF)) { + DEBUG(dbgs() << "Inspecting block #" << BasicBlocks.lookup(MBB) << " [" + << MBB->getName() << "]\n"); + + LivenessMap::const_iterator BI = BlockLiveness.find(MBB); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; + + DEBUG(dbgs()<<"BEGIN : {"); + for (unsigned i=0; i < BlockInfo.Begin.size(); ++i) + DEBUG(dbgs()<<BlockInfo.Begin.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"END : {"); + for (unsigned i=0; i < BlockInfo.End.size(); ++i) + DEBUG(dbgs()<<BlockInfo.End.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + + DEBUG(dbgs()<<"LIVE_IN: {"); + for (unsigned i=0; i < BlockInfo.LiveIn.size(); ++i) + DEBUG(dbgs()<<BlockInfo.LiveIn.test(i)<<" "); + + DEBUG(dbgs()<<"}\n"); + DEBUG(dbgs()<<"LIVEOUT: {"); + for (unsigned i=0; i < BlockInfo.LiveOut.size(); ++i) + DEBUG(dbgs()<<BlockInfo.LiveOut.test(i)<<" "); + DEBUG(dbgs()<<"}\n"); + } +} + +unsigned StackColoring::collectMarkers(unsigned NumSlot) { + unsigned MarkersFound = 0; + // Scan the function to find all lifetime markers. + // NOTE: We use a reverse-post-order iteration to ensure that we obtain a + // deterministic numbering, and because we'll need a post-order iteration + // later for solving the liveness dataflow problem. + for (MachineBasicBlock *MBB : depth_first(MF)) { + + // Assign a serial number to this basic block. + BasicBlocks[MBB] = BasicBlockNumbering.size(); + BasicBlockNumbering.push_back(MBB); + + // Keep a reference to avoid repeated lookups. + BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB]; + + BlockInfo.Begin.resize(NumSlot); + BlockInfo.End.resize(NumSlot); + + for (MachineInstr &MI : *MBB) { + if (MI.getOpcode() != TargetOpcode::LIFETIME_START && + MI.getOpcode() != TargetOpcode::LIFETIME_END) + continue; + + Markers.push_back(&MI); + + bool IsStart = MI.getOpcode() == TargetOpcode::LIFETIME_START; + const MachineOperand &MO = MI.getOperand(0); + unsigned Slot = MO.getIndex(); + + MarkersFound++; + + const AllocaInst *Allocation = MFI->getObjectAllocation(Slot); + if (Allocation) { + DEBUG(dbgs()<<"Found a lifetime marker for slot #"<<Slot<< + " with allocation: "<< Allocation->getName()<<"\n"); + } + + if (IsStart) { + BlockInfo.Begin.set(Slot); + } else { + if (BlockInfo.Begin.test(Slot)) { + // Allocas that start and end within a single block are handled + // specially when computing the LiveIntervals to avoid pessimizing + // the liveness propagation. + BlockInfo.Begin.reset(Slot); + } else { + BlockInfo.End.set(Slot); + } + } + } + } + + // Update statistics. + NumMarkerSeen += MarkersFound; + return MarkersFound; +} + +void StackColoring::calculateLocalLiveness() { + // Perform a standard reverse dataflow computation to solve for + // global liveness. The BEGIN set here is equivalent to KILL in the standard + // formulation, and END is equivalent to GEN. The result of this computation + // is a map from blocks to bitvectors where the bitvectors represent which + // allocas are live in/out of that block. + SmallPtrSet<const MachineBasicBlock*, 8> BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); + unsigned NumSSMIters = 0; + bool changed = true; + while (changed) { + changed = false; + ++NumSSMIters; + + SmallPtrSet<const MachineBasicBlock*, 8> NextBBSet; + + for (const MachineBasicBlock *BB : BasicBlockNumbering) { + if (!BBSet.count(BB)) continue; + + // Use an iterator to avoid repeated lookups. + LivenessMap::iterator BI = BlockLiveness.find(BB); + assert(BI != BlockLiveness.end() && "Block not found"); + BlockLifetimeInfo &BlockInfo = BI->second; + + BitVector LocalLiveIn; + BitVector LocalLiveOut; + + // Forward propagation from begins to ends. + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) { + LivenessMap::const_iterator I = BlockLiveness.find(*PI); + assert(I != BlockLiveness.end() && "Predecessor not found"); + LocalLiveIn |= I->second.LiveOut; + } + LocalLiveIn |= BlockInfo.End; + LocalLiveIn.reset(BlockInfo.Begin); + + // Reverse propagation from ends to begins. + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + LivenessMap::const_iterator I = BlockLiveness.find(*SI); + assert(I != BlockLiveness.end() && "Successor not found"); + LocalLiveOut |= I->second.LiveIn; + } + LocalLiveOut |= BlockInfo.Begin; + LocalLiveOut.reset(BlockInfo.End); + + LocalLiveIn |= LocalLiveOut; + LocalLiveOut |= LocalLiveIn; + + // After adopting the live bits, we need to turn-off the bits which + // are de-activated in this block. + LocalLiveOut.reset(BlockInfo.End); + LocalLiveIn.reset(BlockInfo.Begin); + + // If we have both BEGIN and END markers in the same basic block then + // we know that the BEGIN marker comes after the END, because we already + // handle the case where the BEGIN comes before the END when collecting + // the markers (and building the BEGIN/END vectore). + // Want to enable the LIVE_IN and LIVE_OUT of slots that have both + // BEGIN and END because it means that the value lives before and after + // this basic block. + BitVector LocalEndBegin = BlockInfo.End; + LocalEndBegin &= BlockInfo.Begin; + LocalLiveIn |= LocalEndBegin; + LocalLiveOut |= LocalEndBegin; + + if (LocalLiveIn.test(BlockInfo.LiveIn)) { + changed = true; + BlockInfo.LiveIn |= LocalLiveIn; + + NextBBSet.insert(BB->pred_begin(), BB->pred_end()); + } + + if (LocalLiveOut.test(BlockInfo.LiveOut)) { + changed = true; + BlockInfo.LiveOut |= LocalLiveOut; + + NextBBSet.insert(BB->succ_begin(), BB->succ_end()); + } + } + + BBSet = std::move(NextBBSet); + }// while changed. +} + +void StackColoring::calculateLiveIntervals(unsigned NumSlots) { + SmallVector<SlotIndex, 16> Starts; + SmallVector<SlotIndex, 16> Finishes; + + // For each block, find which slots are active within this block + // and update the live intervals. + for (const MachineBasicBlock &MBB : *MF) { + Starts.clear(); + Starts.resize(NumSlots); + Finishes.clear(); + Finishes.resize(NumSlots); + + // Create the interval for the basic blocks with lifetime markers in them. + for (const MachineInstr *MI : Markers) { + if (MI->getParent() != &MBB) + continue; + + assert((MI->getOpcode() == TargetOpcode::LIFETIME_START || + MI->getOpcode() == TargetOpcode::LIFETIME_END) && + "Invalid Lifetime marker"); + + bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; + const MachineOperand &Mo = MI->getOperand(0); + int Slot = Mo.getIndex(); + assert(Slot >= 0 && "Invalid slot"); + + SlotIndex ThisIndex = Indexes->getInstructionIndex(MI); + + if (IsStart) { + if (!Starts[Slot].isValid() || Starts[Slot] > ThisIndex) + Starts[Slot] = ThisIndex; + } else { + if (!Finishes[Slot].isValid() || Finishes[Slot] < ThisIndex) + Finishes[Slot] = ThisIndex; + } + } + + // Create the interval of the blocks that we previously found to be 'alive'. + BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(&MBB); + } + for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; + pos = MBBLiveness.LiveOut.find_next(pos)) { + Finishes[pos] = Indexes->getMBBEndIdx(&MBB); + } + + for (unsigned i = 0; i < NumSlots; ++i) { + assert(Starts[i].isValid() == Finishes[i].isValid() && "Unmatched range"); + if (!Starts[i].isValid()) + continue; + + assert(Starts[i] && Finishes[i] && "Invalid interval"); + VNInfo *ValNum = Intervals[i]->getValNumInfo(0); + SlotIndex S = Starts[i]; + SlotIndex F = Finishes[i]; + if (S < F) { + // We have a single consecutive region. + Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); + } else { + // We have two non-consecutive regions. This happens when + // LIFETIME_START appears after the LIFETIME_END marker. + SlotIndex NewStart = Indexes->getMBBStartIdx(&MBB); + SlotIndex NewFin = Indexes->getMBBEndIdx(&MBB); + Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); + } + } + } +} + +bool StackColoring::removeAllMarkers() { + unsigned Count = 0; + for (MachineInstr *MI : Markers) { + MI->eraseFromParent(); + Count++; + } + Markers.clear(); + + DEBUG(dbgs()<<"Removed "<<Count<<" markers.\n"); + return Count; +} + +void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) { + unsigned FixedInstr = 0; + unsigned FixedMemOp = 0; + unsigned FixedDbg = 0; + MachineModuleInfo *MMI = &MF->getMMI(); + + // Remap debug information that refers to stack slots. + for (auto &VI : MMI->getVariableDbgInfo()) { + if (!VI.Var) + continue; + if (SlotRemap.count(VI.Slot)) { + DEBUG(dbgs() << "Remapping debug info for [" + << cast<DILocalVariable>(VI.Var)->getName() << "].\n"); + VI.Slot = SlotRemap[VI.Slot]; + FixedDbg++; + } + } + + // Keep a list of *allocas* which need to be remapped. + DenseMap<const AllocaInst*, const AllocaInst*> Allocas; + for (const std::pair<int, int> &SI : SlotRemap) { + const AllocaInst *From = MFI->getObjectAllocation(SI.first); + const AllocaInst *To = MFI->getObjectAllocation(SI.second); + assert(To && From && "Invalid allocation object"); + Allocas[From] = To; + + // AA might be used later for instruction scheduling, and we need it to be + // able to deduce the correct aliasing releationships between pointers + // derived from the alloca being remapped and the target of that remapping. + // The only safe way, without directly informing AA about the remapping + // somehow, is to directly update the IR to reflect the change being made + // here. + Instruction *Inst = const_cast<AllocaInst *>(To); + if (From->getType() != To->getType()) { + BitCastInst *Cast = new BitCastInst(Inst, From->getType()); + Cast->insertAfter(Inst); + Inst = Cast; + } + + // Allow the stack protector to adjust its value map to account for the + // upcoming replacement. + SP->adjustForColoring(From, To); + + // Note that this will not replace uses in MMOs (which we'll update below), + // or anywhere else (which is why we won't delete the original + // instruction). + const_cast<AllocaInst *>(From)->replaceAllUsesWith(Inst); + } + + // Remap all instructions to the new stack slots. + for (MachineBasicBlock &BB : *MF) + for (MachineInstr &I : BB) { + // Skip lifetime markers. We'll remove them soon. + if (I.getOpcode() == TargetOpcode::LIFETIME_START || + I.getOpcode() == TargetOpcode::LIFETIME_END) + continue; + + // Update the MachineMemOperand to use the new alloca. + for (MachineMemOperand *MMO : I.memoperands()) { + // FIXME: In order to enable the use of TBAA when using AA in CodeGen, + // we'll also need to update the TBAA nodes in MMOs with values + // derived from the merged allocas. When doing this, we'll need to use + // the same variant of GetUnderlyingObjects that is used by the + // instruction scheduler (that can look through ptrtoint/inttoptr + // pairs). + + // We've replaced IR-level uses of the remapped allocas, so we only + // need to replace direct uses here. + const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue()); + if (!AI) + continue; + + if (!Allocas.count(AI)) + continue; + + MMO->setValue(Allocas[AI]); + FixedMemOp++; + } + + // Update all of the machine instruction operands. + for (MachineOperand &MO : I.operands()) { + if (!MO.isFI()) + continue; + int FromSlot = MO.getIndex(); + + // Don't touch arguments. + if (FromSlot<0) + continue; + + // Only look at mapped slots. + if (!SlotRemap.count(FromSlot)) + continue; + + // In a debug build, check that the instruction that we are modifying is + // inside the expected live range. If the instruction is not inside + // the calculated range then it means that the alloca usage moved + // outside of the lifetime markers, or that the user has a bug. + // NOTE: Alloca address calculations which happen outside the lifetime + // zone are are okay, despite the fact that we don't have a good way + // for validating all of the usages of the calculation. +#ifndef NDEBUG + bool TouchesMemory = I.mayLoad() || I.mayStore(); + // If we *don't* protect the user from escaped allocas, don't bother + // validating the instructions. + if (!I.isDebugValue() && TouchesMemory && ProtectFromEscapedAllocas) { + SlotIndex Index = Indexes->getInstructionIndex(&I); + const LiveInterval *Interval = &*Intervals[FromSlot]; + assert(Interval->find(Index) != Interval->end() && + "Found instruction usage outside of live range."); + } +#endif + + // Fix the machine instructions. + int ToSlot = SlotRemap[FromSlot]; + MO.setIndex(ToSlot); + FixedInstr++; + } + } + + DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n"); + DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n"); +} + +void StackColoring::removeInvalidSlotRanges() { + for (MachineBasicBlock &BB : *MF) + for (MachineInstr &I : BB) { + if (I.getOpcode() == TargetOpcode::LIFETIME_START || + I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugValue()) + continue; + + // Some intervals are suspicious! In some cases we find address + // calculations outside of the lifetime zone, but not actual memory + // read or write. Memory accesses outside of the lifetime zone are a clear + // violation, but address calculations are okay. This can happen when + // GEPs are hoisted outside of the lifetime zone. + // So, in here we only check instructions which can read or write memory. + if (!I.mayLoad() && !I.mayStore()) + continue; + + // Check all of the machine operands. + for (const MachineOperand &MO : I.operands()) { + if (!MO.isFI()) + continue; + + int Slot = MO.getIndex(); + + if (Slot<0) + continue; + + if (Intervals[Slot]->empty()) + continue; + + // Check that the used slot is inside the calculated lifetime range. + // If it is not, warn about it and invalidate the range. + LiveInterval *Interval = &*Intervals[Slot]; + SlotIndex Index = Indexes->getInstructionIndex(&I); + if (Interval->find(Index) == Interval->end()) { + Interval->clear(); + DEBUG(dbgs()<<"Invalidating range #"<<Slot<<"\n"); + EscapedAllocas++; + } + } + } +} + +void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap, + unsigned NumSlots) { + // Expunge slot remap map. + for (unsigned i=0; i < NumSlots; ++i) { + // If we are remapping i + if (SlotRemap.count(i)) { + int Target = SlotRemap[i]; + // As long as our target is mapped to something else, follow it. + while (SlotRemap.count(Target)) { + Target = SlotRemap[Target]; + SlotRemap[i] = Target; + } + } + } +} + +bool StackColoring::runOnMachineFunction(MachineFunction &Func) { + if (skipOptnoneFunction(*Func.getFunction())) + return false; + + DEBUG(dbgs() << "********** Stack Coloring **********\n" + << "********** Function: " + << ((const Value*)Func.getFunction())->getName() << '\n'); + MF = &Func; + MFI = MF->getFrameInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + SP = &getAnalysis<StackProtector>(); + BlockLiveness.clear(); + BasicBlocks.clear(); + BasicBlockNumbering.clear(); + Markers.clear(); + Intervals.clear(); + VNInfoAllocator.Reset(); + + unsigned NumSlots = MFI->getObjectIndexEnd(); + + // If there are no stack slots then there are no markers to remove. + if (!NumSlots) + return false; + + SmallVector<int, 8> SortedSlots; + + SortedSlots.reserve(NumSlots); + Intervals.reserve(NumSlots); + + unsigned NumMarkers = collectMarkers(NumSlots); + + unsigned TotalSize = 0; + DEBUG(dbgs()<<"Found "<<NumMarkers<<" markers and "<<NumSlots<<" slots\n"); + DEBUG(dbgs()<<"Slot structure:\n"); + + for (int i=0; i < MFI->getObjectIndexEnd(); ++i) { + DEBUG(dbgs()<<"Slot #"<<i<<" - "<<MFI->getObjectSize(i)<<" bytes.\n"); + TotalSize += MFI->getObjectSize(i); + } + + DEBUG(dbgs()<<"Total Stack size: "<<TotalSize<<" bytes\n\n"); + + // Don't continue because there are not enough lifetime markers, or the + // stack is too small, or we are told not to optimize the slots. + if (NumMarkers < 2 || TotalSize < 16 || DisableColoring) { + DEBUG(dbgs()<<"Will not try to merge slots.\n"); + return removeAllMarkers(); + } + + for (unsigned i=0; i < NumSlots; ++i) { + std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0)); + LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator); + Intervals.push_back(std::move(LI)); + SortedSlots.push_back(i); + } + + // Calculate the liveness of each block. + calculateLocalLiveness(); + + // Propagate the liveness information. + calculateLiveIntervals(NumSlots); + + // Search for allocas which are used outside of the declared lifetime + // markers. + if (ProtectFromEscapedAllocas) + removeInvalidSlotRanges(); + + // Maps old slots to new slots. + DenseMap<int, int> SlotRemap; + unsigned RemovedSlots = 0; + unsigned ReducedSize = 0; + + // Do not bother looking at empty intervals. + for (unsigned I = 0; I < NumSlots; ++I) { + if (Intervals[SortedSlots[I]]->empty()) + SortedSlots[I] = -1; + } + + // This is a simple greedy algorithm for merging allocas. First, sort the + // slots, placing the largest slots first. Next, perform an n^2 scan and look + // for disjoint slots. When you find disjoint slots, merge the samller one + // into the bigger one and update the live interval. Remove the small alloca + // and continue. + + // Sort the slots according to their size. Place unused slots at the end. + // Use stable sort to guarantee deterministic code generation. + std::stable_sort(SortedSlots.begin(), SortedSlots.end(), + [this](int LHS, int RHS) { + // We use -1 to denote a uninteresting slot. Place these slots at the end. + if (LHS == -1) return false; + if (RHS == -1) return true; + // Sort according to size. + return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS); + }); + + bool Changed = true; + while (Changed) { + Changed = false; + for (unsigned I = 0; I < NumSlots; ++I) { + if (SortedSlots[I] == -1) + continue; + + for (unsigned J=I+1; J < NumSlots; ++J) { + if (SortedSlots[J] == -1) + continue; + + int FirstSlot = SortedSlots[I]; + int SecondSlot = SortedSlots[J]; + LiveInterval *First = &*Intervals[FirstSlot]; + LiveInterval *Second = &*Intervals[SecondSlot]; + assert (!First->empty() && !Second->empty() && "Found an empty range"); + + // Merge disjoint slots. + if (!First->overlaps(*Second)) { + Changed = true; + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); + SlotRemap[SecondSlot] = FirstSlot; + SortedSlots[J] = -1; + DEBUG(dbgs()<<"Merging #"<<FirstSlot<<" and slots #"<< + SecondSlot<<" together.\n"); + unsigned MaxAlignment = std::max(MFI->getObjectAlignment(FirstSlot), + MFI->getObjectAlignment(SecondSlot)); + + assert(MFI->getObjectSize(FirstSlot) >= + MFI->getObjectSize(SecondSlot) && + "Merging a small object into a larger one"); + + RemovedSlots+=1; + ReducedSize += MFI->getObjectSize(SecondSlot); + MFI->setObjectAlignment(FirstSlot, MaxAlignment); + MFI->RemoveStackObject(SecondSlot); + } + } + } + }// While changed. + + // Record statistics. + StackSpaceSaved += ReducedSize; + StackSlotMerged += RemovedSlots; + DEBUG(dbgs()<<"Merge "<<RemovedSlots<<" slots. Saved "<< + ReducedSize<<" bytes\n"); + + // Scan the entire function and update all machine operands that use frame + // indices to use the remapped frame index. + expungeSlotMap(SlotRemap, NumSlots); + remapInstructions(SlotRemap); + + return removeAllMarkers(); +} diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp new file mode 100644 index 0000000..8550583 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -0,0 +1,166 @@ +//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the StackMap Liveness analysis pass. The pass calculates +// the liveness for each basic block in a function and attaches the register +// live-out information to a stackmap or patchpoint intrinsic if present. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LivePhysRegs.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "stackmaps" + +static cl::opt<bool> EnablePatchPointLiveness( + "enable-patchpoint-liveness", cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); + +STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); +STATISTIC(NumStackMapFuncSkipped, "Number of functions skipped"); +STATISTIC(NumBBsVisited, "Number of basic blocks visited"); +STATISTIC(NumBBsHaveNoStackmap, "Number of basic blocks with no stackmap"); +STATISTIC(NumStackMaps, "Number of StackMaps visited"); + +namespace { +/// \brief This pass calculates the liveness information for each basic block in +/// a function and attaches the register live-out information to a patchpoint +/// intrinsic if present. +/// +/// This pass can be disabled via the -enable-patchpoint-liveness=false flag. +/// The pass skips functions that don't have any patchpoint intrinsics. The +/// information provided by this pass is optional and not required by the +/// aformentioned intrinsic to function. +class StackMapLiveness : public MachineFunctionPass { + const TargetRegisterInfo *TRI; + LivePhysRegs LiveRegs; + +public: + static char ID; + + /// \brief Default construct and initialize the pass. + StackMapLiveness(); + + /// \brief Tell the pass manager which passes we depend on and what + /// information we preserve. + void getAnalysisUsage(AnalysisUsage &AU) const override; + + /// \brief Calculate the liveness information for the given machine function. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Performs the actual liveness calculation for the function. + bool calculateLiveness(MachineFunction &MF); + + /// \brief Add the current register live set to the instruction. + void addLiveOutSetToMI(MachineFunction &MF, MachineInstr &MI); + + /// \brief Create a register mask and initialize it with the registers from + /// the register live set. + uint32_t *createRegisterMask(MachineFunction &MF) const; +}; +} // namespace + +char StackMapLiveness::ID = 0; +char &llvm::StackMapLivenessID = StackMapLiveness::ID; +INITIALIZE_PASS(StackMapLiveness, "stackmap-liveness", + "StackMap Liveness Analysis", false, false) + +/// Default construct and initialize the pass. +StackMapLiveness::StackMapLiveness() : MachineFunctionPass(ID) { + initializeStackMapLivenessPass(*PassRegistry::getPassRegistry()); +} + +/// Tell the pass manager which passes we depend on and what information we +/// preserve. +void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { + // We preserve all information. + AU.setPreservesAll(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +/// Calculate the liveness information for the given machine function. +bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) { + if (!EnablePatchPointLiveness) + return false; + + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << MF.getName() + << " **********\n"); + TRI = MF.getSubtarget().getRegisterInfo(); + ++NumStackMapFuncVisited; + + // Skip this function if there are no patchpoints to process. + if (!MF.getFrameInfo()->hasPatchPoint()) { + ++NumStackMapFuncSkipped; + return false; + } + return calculateLiveness(MF); +} + +/// Performs the actual liveness calculation for the function. +bool StackMapLiveness::calculateLiveness(MachineFunction &MF) { + bool HasChanged = false; + // For all basic blocks in the function. + for (auto &MBB : MF) { + DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n"); + LiveRegs.init(TRI); + LiveRegs.addLiveOuts(&MBB); + bool HasStackMap = false; + // Reverse iterate over all instructions and add the current live register + // set to an instruction if we encounter a patchpoint instruction. + for (auto I = MBB.rbegin(), E = MBB.rend(); I != E; ++I) { + if (I->getOpcode() == TargetOpcode::PATCHPOINT) { + addLiveOutSetToMI(MF, *I); + HasChanged = true; + HasStackMap = true; + ++NumStackMaps; + } + DEBUG(dbgs() << " " << LiveRegs << " " << *I); + LiveRegs.stepBackward(*I); + } + ++NumBBsVisited; + if (!HasStackMap) + ++NumBBsHaveNoStackmap; + } + return HasChanged; +} + +/// Add the current register live set to the instruction. +void StackMapLiveness::addLiveOutSetToMI(MachineFunction &MF, + MachineInstr &MI) { + uint32_t *Mask = createRegisterMask(MF); + MachineOperand MO = MachineOperand::CreateRegLiveOut(Mask); + MI.addOperand(MF, MO); +} + +/// Create a register mask and initialize it with the registers from the +/// register live set. +uint32_t *StackMapLiveness::createRegisterMask(MachineFunction &MF) const { + // The mask is owned and cleaned up by the Machine Function. + uint32_t *Mask = MF.allocateRegisterMask(TRI->getNumRegs()); + for (auto Reg : LiveRegs) + Mask[Reg / 32] |= 1U << (Reg % 32); + + // Give the target a chance to adjust the mask. + TRI->adjustStackMapLiveOutMask(Mask); + + return Mask; +} diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp new file mode 100644 index 0000000..b3cd8b3 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp @@ -0,0 +1,552 @@ +//===---------------------------- StackMaps.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOpcodes.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <iterator> + +using namespace llvm; + +#define DEBUG_TYPE "stackmaps" + +static cl::opt<int> StackMapVersion( + "stackmap-version", cl::init(1), + cl::desc("Specify the stackmap encoding version (default = 1)")); + +const char *StackMaps::WSMP = "Stack Maps: "; + +PatchPointOpers::PatchPointOpers(const MachineInstr *MI) + : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == + CallingConv::AnyReg) { +#ifndef NDEBUG + unsigned CheckStartIdx = 0, e = MI->getNumOperands(); + while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && + MI->getOperand(CheckStartIdx).isDef() && + !MI->getOperand(CheckStartIdx).isImplicit()) + ++CheckStartIdx; + + assert(getMetaIdx() == CheckStartIdx && + "Unexpected additional definition in Patchpoint intrinsic."); +#endif +} + +unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { + if (!StartIdx) + StartIdx = getVarIdx(); + + // Find the next scratch register (implicit def and early clobber) + unsigned ScratchIdx = StartIdx, e = MI->getNumOperands(); + while (ScratchIdx < e && + !(MI->getOperand(ScratchIdx).isReg() && + MI->getOperand(ScratchIdx).isDef() && + MI->getOperand(ScratchIdx).isImplicit() && + MI->getOperand(ScratchIdx).isEarlyClobber())) + ++ScratchIdx; + + assert(ScratchIdx != e && "No scratch register available"); + return ScratchIdx; +} + +StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) { + if (StackMapVersion != 1) + llvm_unreachable("Unsupported stackmap version!"); +} + +/// Go up the super-register chain until we hit a valid dwarf register number. +static unsigned getDwarfRegNum(unsigned Reg, const TargetRegisterInfo *TRI) { + int RegNum = TRI->getDwarfRegNum(Reg, false); + for (MCSuperRegIterator SR(Reg, TRI); SR.isValid() && RegNum < 0; ++SR) + RegNum = TRI->getDwarfRegNum(*SR, false); + + assert(RegNum >= 0 && "Invalid Dwarf register number."); + return (unsigned)RegNum; +} + +MachineInstr::const_mop_iterator +StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, LocationVec &Locs, + LiveOutVec &LiveOuts) const { + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); + if (MOI->isImm()) { + switch (MOI->getImm()) { + default: + llvm_unreachable("Unrecognized operand type."); + case StackMaps::DirectMemRefOp: { + auto &DL = AP.MF->getDataLayout(); + + unsigned Size = DL.getPointerSizeInBits(); + assert((Size % 8) == 0 && "Need pointer size in bytes."); + Size /= 8; + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.emplace_back(StackMaps::Location::Direct, Size, + getDwarfRegNum(Reg, TRI), Imm); + break; + } + case StackMaps::IndirectMemRefOp: { + int64_t Size = (++MOI)->getImm(); + assert(Size > 0 && "Need a valid size for indirect memory locations."); + unsigned Reg = (++MOI)->getReg(); + int64_t Imm = (++MOI)->getImm(); + Locs.emplace_back(StackMaps::Location::Indirect, Size, + getDwarfRegNum(Reg, TRI), Imm); + break; + } + case StackMaps::ConstantOp: { + ++MOI; + assert(MOI->isImm() && "Expected constant operand."); + int64_t Imm = MOI->getImm(); + Locs.emplace_back(Location::Constant, sizeof(int64_t), 0, Imm); + break; + } + } + return ++MOI; + } + + // The physical register number will ultimately be encoded as a DWARF regno. + // The stack map also records the size of a spill slot that can hold the + // register content. (The runtime can track the actual size of the data type + // if it needs to.) + if (MOI->isReg()) { + // Skip implicit registers (this includes our scratch registers) + if (MOI->isImplicit()) + return ++MOI; + + assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) && + "Virtreg operands should have been rewritten before now."); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg()); + assert(!MOI->getSubReg() && "Physical subreg still around."); + + unsigned Offset = 0; + unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI); + unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false); + unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg()); + if (SubRegIdx) + Offset = TRI->getSubRegIdxOffset(SubRegIdx); + + Locs.emplace_back(Location::Register, RC->getSize(), DwarfRegNum, Offset); + return ++MOI; + } + + if (MOI->isRegLiveOut()) + LiveOuts = parseRegisterLiveOutMask(MOI->getRegLiveOut()); + + return ++MOI; +} + +void StackMaps::print(raw_ostream &OS) { + const TargetRegisterInfo *TRI = + AP.MF ? AP.MF->getSubtarget().getRegisterInfo() : nullptr; + OS << WSMP << "callsites:\n"; + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; + + OS << WSMP << "callsite " << CSI.ID << "\n"; + OS << WSMP << " has " << CSLocs.size() << " locations\n"; + + unsigned Idx = 0; + for (const auto &Loc : CSLocs) { + OS << WSMP << "\t\tLoc " << Idx << ": "; + switch (Loc.Type) { + case Location::Unprocessed: + OS << "<Unprocessed operand>"; + break; + case Location::Register: + OS << "Register "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + break; + case Location::Direct: + OS << "Direct "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + if (Loc.Offset) + OS << " + " << Loc.Offset; + break; + case Location::Indirect: + OS << "Indirect "; + if (TRI) + OS << TRI->getName(Loc.Reg); + else + OS << Loc.Reg; + OS << "+" << Loc.Offset; + break; + case Location::Constant: + OS << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + OS << "Constant Index " << Loc.Offset; + break; + } + OS << "\t[encoding: .byte " << Loc.Type << ", .byte " << Loc.Size + << ", .short " << Loc.Reg << ", .int " << Loc.Offset << "]\n"; + Idx++; + } + + OS << WSMP << "\thas " << LiveOuts.size() << " live-out registers\n"; + + Idx = 0; + for (const auto &LO : LiveOuts) { + OS << WSMP << "\t\tLO " << Idx << ": "; + if (TRI) + OS << TRI->getName(LO.Reg); + else + OS << LO.Reg; + OS << "\t[encoding: .short " << LO.DwarfRegNum << ", .byte 0, .byte " + << LO.Size << "]\n"; + Idx++; + } + } +} + +/// Create a live-out register record for the given register Reg. +StackMaps::LiveOutReg +StackMaps::createLiveOutReg(unsigned Reg, const TargetRegisterInfo *TRI) const { + unsigned DwarfRegNum = getDwarfRegNum(Reg, TRI); + unsigned Size = TRI->getMinimalPhysRegClass(Reg)->getSize(); + return LiveOutReg(Reg, DwarfRegNum, Size); +} + +/// Parse the register live-out mask and return a vector of live-out registers +/// that need to be recorded in the stackmap. +StackMaps::LiveOutVec +StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const { + assert(Mask && "No register mask specified"); + const TargetRegisterInfo *TRI = AP.MF->getSubtarget().getRegisterInfo(); + LiveOutVec LiveOuts; + + // Create a LiveOutReg for each bit that is set in the register mask. + for (unsigned Reg = 0, NumRegs = TRI->getNumRegs(); Reg != NumRegs; ++Reg) + if ((Mask[Reg / 32] >> Reg % 32) & 1) + LiveOuts.push_back(createLiveOutReg(Reg, TRI)); + + // We don't need to keep track of a register if its super-register is already + // in the list. Merge entries that refer to the same dwarf register and use + // the maximum size that needs to be spilled. + + std::sort(LiveOuts.begin(), LiveOuts.end(), + [](const LiveOutReg &LHS, const LiveOutReg &RHS) { + // Only sort by the dwarf register number. + return LHS.DwarfRegNum < RHS.DwarfRegNum; + }); + + for (auto I = LiveOuts.begin(), E = LiveOuts.end(); I != E; ++I) { + for (auto II = std::next(I); II != E; ++II) { + if (I->DwarfRegNum != II->DwarfRegNum) { + // Skip all the now invalid entries. + I = --II; + break; + } + I->Size = std::max(I->Size, II->Size); + if (TRI->isSuperRegister(I->Reg, II->Reg)) + I->Reg = II->Reg; + II->Reg = 0; // mark for deletion. + } + } + + LiveOuts.erase( + std::remove_if(LiveOuts.begin(), LiveOuts.end(), + [](const LiveOutReg &LO) { return LO.Reg == 0; }), + LiveOuts.end()); + + return LiveOuts; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + bool recordResult) { + + MCContext &OutContext = AP.OutStreamer->getContext(); + MCSymbol *MILabel = OutContext.createTempSymbol(); + AP.OutStreamer->EmitLabel(MILabel); + + LocationVec Locations; + LiveOutVec LiveOuts; + + if (recordResult) { + assert(PatchPointOpers(&MI).hasDef() && "Stackmap has no return value."); + parseOperand(MI.operands_begin(), std::next(MI.operands_begin()), Locations, + LiveOuts); + } + + // Parse operands. + while (MOI != MOE) { + MOI = parseOperand(MOI, MOE, Locations, LiveOuts); + } + + // Move large constants into the constant pool. + for (auto &Loc : Locations) { + // Constants are encoded as sign-extended integers. + // -1 is directly encoded as .long 0xFFFFFFFF with no constant pool. + if (Loc.Type == Location::Constant && !isInt<32>(Loc.Offset)) { + Loc.Type = Location::ConstantIndex; + // ConstPool is intentionally a MapVector of 'uint64_t's (as + // opposed to 'int64_t's). We should never be in a situation + // where we have to insert either the tombstone or the empty + // keys into a map, and for a DenseMap<uint64_t, T> these are + // (uint64_t)0 and (uint64_t)-1. They can be and are + // represented using 32 bit integers. + assert((uint64_t)Loc.Offset != DenseMapInfo<uint64_t>::getEmptyKey() && + (uint64_t)Loc.Offset != + DenseMapInfo<uint64_t>::getTombstoneKey() && + "empty and tombstone keys should fit in 32 bits!"); + auto Result = ConstPool.insert(std::make_pair(Loc.Offset, Loc.Offset)); + Loc.Offset = Result.first - ConstPool.begin(); + } + } + + // Create an expression to calculate the offset of the callsite from function + // entry. + const MCExpr *CSOffsetExpr = MCBinaryExpr::createSub( + MCSymbolRefExpr::create(MILabel, OutContext), + MCSymbolRefExpr::create(AP.CurrentFnSymForSize, OutContext), OutContext); + + CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations), + std::move(LiveOuts)); + + // Record the stack size of the current function. + const MachineFrameInfo *MFI = AP.MF->getFrameInfo(); + const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo(); + bool HasDynamicFrameSize = + MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF)); + FnStackSize[AP.CurrentFnSym] = + HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize(); +} + +void StackMaps::recordStackMap(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap"); + + int64_t ID = MI.getOperand(0).getImm(); + recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2), + MI.operands_end()); +} + +void StackMaps::recordPatchPoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint"); + + PatchPointOpers opers(&MI); + int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); + + auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, MI.operands_end(), + opers.isAnyReg() && opers.hasDef()); + +#ifndef NDEBUG + // verify anyregcc + auto &Locations = CSInfos.back().Locations; + if (opers.isAnyReg()) { + unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); + for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i) + assert(Locations[i].Type == Location::Register && + "anyreg arg must be in reg."); + } +#endif +} +void StackMaps::recordStatepoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STATEPOINT && "expected statepoint"); + + StatepointOpers opers(&MI); + // Record all the deopt and gc operands (they're contiguous and run from the + // initial index to the end of the operand list) + const unsigned StartIdx = opers.getVarIdx(); + recordStackMapOpers(MI, opers.getID(), MI.operands_begin() + StartIdx, + MI.operands_end(), false); +} + +/// Emit the stackmap header. +/// +/// Header { +/// uint8 : Stack Map Version (currently 1) +/// uint8 : Reserved (expected to be 0) +/// uint16 : Reserved (expected to be 0) +/// } +/// uint32 : NumFunctions +/// uint32 : NumConstants +/// uint32 : NumRecords +void StackMaps::emitStackmapHeader(MCStreamer &OS) { + // Header. + OS.EmitIntValue(StackMapVersion, 1); // Version. + OS.EmitIntValue(0, 1); // Reserved. + OS.EmitIntValue(0, 2); // Reserved. + + // Num functions. + DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n'); + OS.EmitIntValue(FnStackSize.size(), 4); + // Num constants. + DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n'); + OS.EmitIntValue(ConstPool.size(), 4); + // Num callsites. + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << '\n'); + OS.EmitIntValue(CSInfos.size(), 4); +} + +/// Emit the function frame record for each function. +/// +/// StkSizeRecord[NumFunctions] { +/// uint64 : Function Address +/// uint64 : Stack Size +/// } +void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) { + // Function Frame records. + DEBUG(dbgs() << WSMP << "functions:\n"); + for (auto const &FR : FnStackSize) { + DEBUG(dbgs() << WSMP << "function addr: " << FR.first + << " frame size: " << FR.second); + OS.EmitSymbolValue(FR.first, 8); + OS.EmitIntValue(FR.second, 8); + } +} + +/// Emit the constant pool. +/// +/// int64 : Constants[NumConstants] +void StackMaps::emitConstantPoolEntries(MCStreamer &OS) { + // Constant pool entries. + DEBUG(dbgs() << WSMP << "constants:\n"); + for (const auto &ConstEntry : ConstPool) { + DEBUG(dbgs() << WSMP << ConstEntry.second << '\n'); + OS.EmitIntValue(ConstEntry.second, 8); + } +} + +/// Emit the callsite info for each callsite. +/// +/// StkMapRecord[NumRecords] { +/// uint64 : PatchPoint ID +/// uint32 : Instruction Offset +/// uint16 : Reserved (record flags) +/// uint16 : NumLocations +/// Location[NumLocations] { +/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex +/// uint8 : Size in Bytes +/// uint16 : Dwarf RegNum +/// int32 : Offset +/// } +/// uint16 : Padding +/// uint16 : NumLiveOuts +/// LiveOuts[NumLiveOuts] { +/// uint16 : Dwarf RegNum +/// uint8 : Reserved +/// uint8 : Size in Bytes +/// } +/// uint32 : Padding (only if required to align to 8 byte) +/// } +/// +/// Location Encoding, Type, Value: +/// 0x1, Register, Reg (value in register) +/// 0x2, Direct, Reg + Offset (frame index) +/// 0x3, Indirect, [Reg + Offset] (spilled value) +/// 0x4, Constant, Offset (small constant) +/// 0x5, ConstIndex, Constants[Offset] (large constant) +void StackMaps::emitCallsiteEntries(MCStreamer &OS) { + DEBUG(print(dbgs())); + // Callsite entries. + for (const auto &CSI : CSInfos) { + const LocationVec &CSLocs = CSI.Locations; + const LiveOutVec &LiveOuts = CSI.LiveOuts; + + // Verify stack map entry. It's better to communicate a problem to the + // runtime than crash in case of in-process compilation. Currently, we do + // simple overflow checks, but we may eventually communicate other + // compilation errors this way. + if (CSLocs.size() > UINT16_MAX || LiveOuts.size() > UINT16_MAX) { + OS.EmitIntValue(UINT64_MAX, 8); // Invalid ID. + OS.EmitValue(CSI.CSOffsetExpr, 4); + OS.EmitIntValue(0, 2); // Reserved. + OS.EmitIntValue(0, 2); // 0 locations. + OS.EmitIntValue(0, 2); // padding. + OS.EmitIntValue(0, 2); // 0 live-out registers. + OS.EmitIntValue(0, 4); // padding. + continue; + } + + OS.EmitIntValue(CSI.ID, 8); + OS.EmitValue(CSI.CSOffsetExpr, 4); + + // Reserved for flags. + OS.EmitIntValue(0, 2); + OS.EmitIntValue(CSLocs.size(), 2); + + for (const auto &Loc : CSLocs) { + OS.EmitIntValue(Loc.Type, 1); + OS.EmitIntValue(Loc.Size, 1); + OS.EmitIntValue(Loc.Reg, 2); + OS.EmitIntValue(Loc.Offset, 4); + } + + // Num live-out registers and padding to align to 4 byte. + OS.EmitIntValue(0, 2); + OS.EmitIntValue(LiveOuts.size(), 2); + + for (const auto &LO : LiveOuts) { + OS.EmitIntValue(LO.DwarfRegNum, 2); + OS.EmitIntValue(0, 1); + OS.EmitIntValue(LO.Size, 1); + } + // Emit alignment to 8 byte. + OS.EmitValueToAlignment(8); + } +} + +/// Serialize the stackmap data. +void StackMaps::serializeToStackMapSection() { + (void)WSMP; + // Bail out if there's no stack map data. + assert((!CSInfos.empty() || (CSInfos.empty() && ConstPool.empty())) && + "Expected empty constant pool too!"); + assert((!CSInfos.empty() || (CSInfos.empty() && FnStackSize.empty())) && + "Expected empty function record too!"); + if (CSInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer->getContext(); + MCStreamer &OS = *AP.OutStreamer; + + // Create the section. + MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + OS.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + OS.EmitLabel(OutContext.getOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + emitStackmapHeader(OS); + emitFunctionFrameRecords(OS); + emitConstantPoolEntries(OS); + emitCallsiteEntries(OS); + OS.AddBlankLine(); + + // Clean up. + CSInfos.clear(); + ConstPool.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp new file mode 100644 index 0000000..db3fef5 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -0,0 +1,493 @@ +//===-- StackProtector.cpp - Stack Protector Insertion --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass inserts stack protectors into functions which need them. A variable +// with a random value in it is stored onto the stack before the local variables +// are allocated. Upon exiting the block, the stored value is checked. If it's +// changed, then there was some sort of violation and the program aborts. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cstdlib> +using namespace llvm; + +#define DEBUG_TYPE "stack-protector" + +STATISTIC(NumFunProtected, "Number of functions protected"); +STATISTIC(NumAddrTaken, "Number of local variables that have their address" + " taken."); + +static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp", + cl::init(true), cl::Hidden); + +char StackProtector::ID = 0; +INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", + false, true) + +FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { + return new StackProtector(TM); +} + +StackProtector::SSPLayoutKind +StackProtector::getSSPLayout(const AllocaInst *AI) const { + return AI ? Layout.lookup(AI) : SSPLK_None; +} + +void StackProtector::adjustForColoring(const AllocaInst *From, + const AllocaInst *To) { + // When coloring replaces one alloca with another, transfer the SSPLayoutKind + // tag from the remapped to the target alloca. The remapped alloca should + // have a size smaller than or equal to the replacement alloca. + SSPLayoutMap::iterator I = Layout.find(From); + if (I != Layout.end()) { + SSPLayoutKind Kind = I->second; + Layout.erase(I); + + // Transfer the tag, but make sure that SSPLK_AddrOf does not overwrite + // SSPLK_SmallArray or SSPLK_LargeArray, and make sure that + // SSPLK_SmallArray does not overwrite SSPLK_LargeArray. + I = Layout.find(To); + if (I == Layout.end()) + Layout.insert(std::make_pair(To, Kind)); + else if (I->second != SSPLK_LargeArray && Kind != SSPLK_AddrOf) + I->second = Kind; + } +} + +bool StackProtector::runOnFunction(Function &Fn) { + F = &Fn; + M = F->getParent(); + DominatorTreeWrapperPass *DTWP = + getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + DT = DTWP ? &DTWP->getDomTree() : nullptr; + TLI = TM->getSubtargetImpl(Fn)->getTargetLowering(); + + Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size"); + if (Attr.isStringAttribute() && + Attr.getValueAsString().getAsInteger(10, SSPBufferSize)) + return false; // Invalid integer string + + if (!RequiresStackProtector()) + return false; + + ++NumFunProtected; + return InsertStackProtectors(); +} + +/// \param [out] IsLarge is set to true if a protectable array is found and +/// it is "large" ( >= ssp-buffer-size). In the case of a structure with +/// multiple arrays, this gets set if any of them is large. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, + bool Strong, + bool InStruct) const { + if (!Ty) + return false; + if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + if (!AT->getElementType()->isIntegerTy(8)) { + // If we're on a non-Darwin platform or we're inside of a structure, don't + // add stack protectors unless the array is a character array. + // However, in strong mode any array, regardless of type and size, + // triggers a protector. + if (!Strong && (InStruct || !Trip.isOSDarwin())) + return false; + } + + // If an array has more than SSPBufferSize bytes of allocated space, then we + // emit stack protectors. + if (SSPBufferSize <= M->getDataLayout().getTypeAllocSize(AT)) { + IsLarge = true; + return true; + } + + if (Strong) + // Require a protector for all arrays in strong mode + return true; + } + + const StructType *ST = dyn_cast<StructType>(Ty); + if (!ST) + return false; + + bool NeedsProtector = false; + for (StructType::element_iterator I = ST->element_begin(), + E = ST->element_end(); + I != E; ++I) + if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { + // If the element is a protectable array and is large (>= SSPBufferSize) + // then we are done. If the protectable array is not large, then + // keep looking in case a subsequent element is a large array. + if (IsLarge) + return true; + NeedsProtector = true; + } + + return NeedsProtector; +} + +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (const User *U : AI->users()) { + if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { + if (AI == SI->getValueOperand()) + return true; + } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) { + if (AI == SI->getOperand(0)) + return true; + } else if (isa<CallInst>(U)) { + return true; + } else if (isa<InvokeInst>(U)) { + return true; + } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) { + if (HasAddressTaken(SI)) + return true; + } else if (const PHINode *PN = dyn_cast<PHINode>(U)) { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + if (VisitedPHIs.insert(PN).second) + if (HasAddressTaken(PN)) + return true; + } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { + if (HasAddressTaken(GEP)) + return true; + } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) { + if (HasAddressTaken(BI)) + return true; + } + } + return false; +} + +/// \brief Check whether or not this function needs a stack protector based +/// upon the stack protector level. +/// +/// We use two heuristics: a standard (ssp) and strong (sspstrong). +/// The standard heuristic which will add a guard variable to functions that +/// call alloca with a either a variable size or a size >= SSPBufferSize, +/// functions with character buffers larger than SSPBufferSize, and functions +/// with aggregates containing character buffers larger than SSPBufferSize. The +/// strong heuristic will add a guard variables to functions that call alloca +/// regardless of size, functions with any buffer regardless of type and size, +/// functions with aggregates that contain any buffer regardless of type and +/// size, and functions that contain stack-based variables that have had their +/// address taken. +bool StackProtector::RequiresStackProtector() { + bool Strong = false; + bool NeedsProtector = false; + if (F->hasFnAttribute(Attribute::StackProtectReq)) { + NeedsProtector = true; + Strong = true; // Use the same heuristic as strong to determine SSPLayout + } else if (F->hasFnAttribute(Attribute::StackProtectStrong)) + Strong = true; + else if (!F->hasFnAttribute(Attribute::StackProtect)) + return false; + + for (const BasicBlock &BB : *F) { + for (const Instruction &I : BB) { + if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) { + if (AI->isArrayAllocation()) { + // SSP-Strong: Enable protectors for any call to alloca, regardless + // of size. + if (Strong) + return true; + + if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) { + if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { + // A call to alloca with size >= SSPBufferSize requires + // stack protectors. + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } else if (Strong) { + // Require protectors for all alloca calls in strong mode. + Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); + NeedsProtector = true; + } + } else { + // A call to alloca with a variable size requires protectors. + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } + continue; + } + + bool IsLarge = false; + if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { + Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray + : SSPLK_SmallArray)); + NeedsProtector = true; + continue; + } + + if (Strong && HasAddressTaken(AI)) { + ++NumAddrTaken; + Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); + NeedsProtector = true; + } + } + } + } + + return NeedsProtector; +} + +static bool InstructionWillNotHaveChain(const Instruction *I) { + return !I->mayHaveSideEffects() && !I->mayReadFromMemory() && + isSafeToSpeculativelyExecute(I); +} + +/// Identify if RI has a previous instruction in the "Tail Position" and return +/// it. Otherwise return 0. +/// +/// This is based off of the code in llvm::isInTailCallPosition. The difference +/// is that it inverts the first part of llvm::isInTailCallPosition since +/// isInTailCallPosition is checking if a call is in a tail call position, and +/// we are searching for an unknown tail call that might be in the tail call +/// position. Once we find the call though, the code uses the same refactored +/// code, returnTypeIsEligibleForTailCall. +static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, + const TargetLoweringBase *TLI) { + // Establish a reasonable upper bound on the maximum amount of instructions we + // will look through to find a tail call. + unsigned SearchCounter = 0; + const unsigned MaxSearch = 4; + bool NoInterposingChain = true; + + for (BasicBlock::reverse_iterator I = std::next(BB->rbegin()), E = BB->rend(); + I != E && SearchCounter < MaxSearch; ++I) { + Instruction *Inst = &*I; + + // Skip over debug intrinsics and do not allow them to affect our MaxSearch + // counter. + if (isa<DbgInfoIntrinsic>(Inst)) + continue; + + // If we find a call and the following conditions are satisifed, then we + // have found a tail call that satisfies at least the target independent + // requirements of a tail call: + // + // 1. The call site has the tail marker. + // + // 2. The call site either will not cause the creation of a chain or if a + // chain is necessary there are no instructions in between the callsite and + // the call which would create an interposing chain. + // + // 3. The return type of the function does not impede tail call + // optimization. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) { + if (CI->isTailCall() && + (InstructionWillNotHaveChain(CI) || NoInterposingChain) && + returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI)) + return CI; + } + + // If we did not find a call see if we have an instruction that may create + // an interposing chain. + NoInterposingChain = + NoInterposingChain && InstructionWillNotHaveChain(Inst); + + // Increment max search. + SearchCounter++; + } + + return nullptr; +} + +/// Insert code into the entry block that stores the __stack_chk_guard +/// variable onto the stack: +/// +/// entry: +/// StackGuardSlot = alloca i8* +/// StackGuard = load __stack_chk_guard +/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) +/// +/// Returns true if the platform/triple supports the stackprotectorcreate pseudo +/// node. +static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, + const TargetLoweringBase *TLI, const Triple &TT, + AllocaInst *&AI, Value *&StackGuardVar) { + bool SupportsSelectionDAGSP = false; + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = + ConstantExpr::getIntToPtr(OffsetVal, PointerType::get(PtrTy, + AddressSpace)); + } else if (TT.isOSOpenBSD()) { + StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); + cast<GlobalValue>(StackGuardVar) + ->setVisibility(GlobalValue::HiddenVisibility); + } else { + SupportsSelectionDAGSP = true; + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + IRBuilder<> B(&F->getEntryBlock().front()); + AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot"); + LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); + B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), + {LI, AI}); + + return SupportsSelectionDAGSP; +} + +/// InsertStackProtectors - Insert code into the prologue and epilogue of the +/// function. +/// +/// - The prologue code loads and stores the stack guard onto the stack. +/// - The epilogue checks the value stored in the prologue against the original +/// value. It calls __stack_chk_fail if they differ. +bool StackProtector::InsertStackProtectors() { + bool HasPrologue = false; + bool SupportsSelectionDAGSP = + EnableSelectionDAGSP && !TM->Options.EnableFastISel; + AllocaInst *AI = nullptr; // Place on stack that stores the stack guard. + Value *StackGuardVar = nullptr; // The stack guard variable. + + for (Function::iterator I = F->begin(), E = F->end(); I != E;) { + BasicBlock *BB = &*I++; + ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); + if (!RI) + continue; + + if (!HasPrologue) { + HasPrologue = true; + SupportsSelectionDAGSP &= + CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); + } + + if (SupportsSelectionDAGSP) { + // Since we have a potential tail call, insert the special stack check + // intrinsic. + Instruction *InsertionPt = nullptr; + if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { + InsertionPt = CI; + } else { + InsertionPt = RI; + // At this point we know that BB has a return statement so it *DOES* + // have a terminator. + assert(InsertionPt != nullptr && + "BB must have a terminator instruction at this point."); + } + + Function *Intrinsic = + Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); + CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); + } else { + // If we do not support SelectionDAG based tail calls, generate IR level + // tail calls. + // + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Create the FailBB. We duplicate the BB every time since the MI tail + // merge pass will merge together all of the various BB into one including + // fail BB generated by the stack protector pseudo instruction. + BasicBlock *FailBB = CreateFailBB(); + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return"); + + // Update the dominator tree if we need to. + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + DT->addNewBlock(FailBB, BB); + } + + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); + + // Move the newly created basic block to the point right after the old + // basic block so that it's in the "fall through" position. + NewBB->moveAfter(BB); + + // Generate the stack protector instructions in the old basic block. + IRBuilder<> B(BB); + LoadInst *LI1 = B.CreateLoad(StackGuardVar); + LoadInst *LI2 = B.CreateLoad(AI); + Value *Cmp = B.CreateICmpEQ(LI1, LI2); + auto SuccessProb = + BranchProbabilityInfo::getBranchProbStackProtector(true); + auto FailureProb = + BranchProbabilityInfo::getBranchProbStackProtector(false); + MDNode *Weights = MDBuilder(F->getContext()) + .createBranchWeights(SuccessProb.getNumerator(), + FailureProb.getNumerator()); + B.CreateCondBr(Cmp, NewBB, FailBB, Weights); + } + } + + // Return if we didn't modify any basic blocks. i.e., there are no return + // statements in the function. + return HasPrologue; +} + +/// CreateFailBB - Create a basic block to jump to when the stack protector +/// check fails. +BasicBlock *StackProtector::CreateFailBB() { + LLVMContext &Context = F->getContext(); + BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); + IRBuilder<> B(FailBB); + if (Trip.isOSOpenBSD()) { + Constant *StackChkFail = + M->getOrInsertFunction("__stack_smash_handler", + Type::getVoidTy(Context), + Type::getInt8PtrTy(Context), nullptr); + + B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); + } else { + Constant *StackChkFail = + M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context), + nullptr); + B.CreateCall(StackChkFail, {}); + } + B.CreateUnreachable(); + return FailBB; +} diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp new file mode 100644 index 0000000..51f4d0e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -0,0 +1,470 @@ +//===-- StackSlotColoring.cpp - Stack slot coloring pass. -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the stack slot coloring pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <vector> +using namespace llvm; + +#define DEBUG_TYPE "stackslotcoloring" + +static cl::opt<bool> +DisableSharing("no-stack-slot-sharing", + cl::init(false), cl::Hidden, + cl::desc("Suppress slot sharing during stack coloring")); + +static cl::opt<int> DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); + +STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); +STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); + +namespace { + class StackSlotColoring : public MachineFunctionPass { + LiveStacks* LS; + MachineFrameInfo *MFI; + const TargetInstrInfo *TII; + const MachineBlockFrequencyInfo *MBFI; + + // SSIntervals - Spill slot intervals. + std::vector<LiveInterval*> SSIntervals; + + // SSRefs - Keep a list of MachineMemOperands for each spill slot. + // MachineMemOperands can be shared between instructions, so we need + // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not + // become FI0 -> FI1 -> FI2. + SmallVector<SmallVector<MachineMemOperand *, 8>, 16> SSRefs; + + // OrigAlignments - Alignments of stack objects before coloring. + SmallVector<unsigned, 16> OrigAlignments; + + // OrigSizes - Sizess of stack objects before coloring. + SmallVector<unsigned, 16> OrigSizes; + + // AllColors - If index is set, it's a spill slot, i.e. color. + // FIXME: This assumes PEI locate spill slot with smaller indices + // closest to stack pointer / frame pointer. Therefore, smaller + // index == better color. + BitVector AllColors; + + // NextColor - Next "color" that's not yet used. + int NextColor; + + // UsedColors - "Colors" that have been assigned. + BitVector UsedColors; + + // Assignments - Color to intervals mapping. + SmallVector<SmallVector<LiveInterval*,4>, 16> Assignments; + + public: + static char ID; // Pass identification + StackSlotColoring() : + MachineFunctionPass(ID), NextColor(-1) { + initializeStackSlotColoringPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveStacks>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + AU.addPreserved<MachineBlockFrequencyInfo>(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + private: + void InitializeSlots(); + void ScanForSpillSlotRefs(MachineFunction &MF); + bool OverlapWithAssignments(LiveInterval *li, int Color) const; + int ColorSlot(LiveInterval *li); + bool ColorSlots(MachineFunction &MF); + void RewriteInstruction(MachineInstr *MI, SmallVectorImpl<int> &SlotMapping, + MachineFunction &MF); + bool RemoveDeadStores(MachineBasicBlock* MBB); + }; +} // end anonymous namespace + +char StackSlotColoring::ID = 0; +char &llvm::StackSlotColoringID = StackSlotColoring::ID; + +INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) +INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring", + "Stack Slot Coloring", false, false) + +namespace { + // IntervalSorter - Comparison predicate that sort live intervals by + // their weight. + struct IntervalSorter { + bool operator()(LiveInterval* LHS, LiveInterval* RHS) const { + return LHS->weight > RHS->weight; + } + }; +} + +/// ScanForSpillSlotRefs - Scan all the machine instructions for spill slot +/// references and update spill slot weights. +void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { + SSRefs.resize(MFI->getObjectIndexEnd()); + + // FIXME: Need the equivalent of MachineRegisterInfo for frameindex operands. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) { + MachineInstr *MI = &*MII; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int FI = MO.getIndex(); + if (FI < 0) + continue; + if (!LS->hasInterval(FI)) + continue; + LiveInterval &li = LS->getInterval(FI); + if (!MI->isDebugValue()) + li.weight += LiveIntervals::getSpillWeight(false, true, MBFI, MI); + } + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { + MachineMemOperand *MMO = *MMOI; + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast_or_null<FixedStackPseudoSourceValue>( + MMO->getPseudoValue())) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); + } + } + } + } +} + +/// InitializeSlots - Process all spill stack slot liveintervals and add them +/// to a sorted (by weight) list. +void StackSlotColoring::InitializeSlots() { + int LastFI = MFI->getObjectIndexEnd(); + OrigAlignments.resize(LastFI); + OrigSizes.resize(LastFI); + AllColors.resize(LastFI); + UsedColors.resize(LastFI); + Assignments.resize(LastFI); + + typedef std::iterator_traits<LiveStacks::iterator>::value_type Pair; + SmallVector<Pair *, 16> Intervals; + Intervals.reserve(LS->getNumIntervals()); + for (auto &I : *LS) + Intervals.push_back(&I); + std::sort(Intervals.begin(), Intervals.end(), + [](Pair *LHS, Pair *RHS) { return LHS->first < RHS->first; }); + + // Gather all spill slots into a list. + DEBUG(dbgs() << "Spill slot intervals:\n"); + for (auto *I : Intervals) { + LiveInterval &li = I->second; + DEBUG(li.dump()); + int FI = TargetRegisterInfo::stackSlot2Index(li.reg); + if (MFI->isDeadObjectIndex(FI)) + continue; + SSIntervals.push_back(&li); + OrigAlignments[FI] = MFI->getObjectAlignment(FI); + OrigSizes[FI] = MFI->getObjectSize(FI); + AllColors.set(FI); + } + DEBUG(dbgs() << '\n'); + + // Sort them by weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + + // Get first "color". + NextColor = AllColors.find_first(); +} + +/// OverlapWithAssignments - Return true if LiveInterval overlaps with any +/// LiveIntervals that have already been assigned to the specified color. +bool +StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { + const SmallVectorImpl<LiveInterval *> &OtherLIs = Assignments[Color]; + for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { + LiveInterval *OtherLI = OtherLIs[i]; + if (OtherLI->overlaps(*li)) + return true; + } + return false; +} + +/// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. +/// +int StackSlotColoring::ColorSlot(LiveInterval *li) { + int Color = -1; + bool Share = false; + if (!DisableSharing) { + // Check if it's possible to reuse any of the used colors. + Color = UsedColors.find_first(); + while (Color != -1) { + if (!OverlapWithAssignments(li, Color)) { + Share = true; + ++NumEliminated; + break; + } + Color = UsedColors.find_next(Color); + } + } + + // Assign it to the first available color (assumed to be the best) if it's + // not possible to share a used color with other objects. + if (!Share) { + assert(NextColor != -1 && "No more spill slots?"); + Color = NextColor; + UsedColors.set(Color); + NextColor = AllColors.find_next(NextColor); + } + + // Record the assignment. + Assignments[Color].push_back(li); + int FI = TargetRegisterInfo::stackSlot2Index(li->reg); + DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n"); + + // Change size and alignment of the allocated slot. If there are multiple + // objects sharing the same slot, then make sure the size and alignment + // are large enough for all. + unsigned Align = OrigAlignments[FI]; + if (!Share || Align > MFI->getObjectAlignment(Color)) + MFI->setObjectAlignment(Color, Align); + int64_t Size = OrigSizes[FI]; + if (!Share || Size > MFI->getObjectSize(Color)) + MFI->setObjectSize(Color, Size); + return Color; +} + +/// Colorslots - Color all spill stack slots and rewrite all frameindex machine +/// operands in the function. +bool StackSlotColoring::ColorSlots(MachineFunction &MF) { + unsigned NumObjs = MFI->getObjectIndexEnd(); + SmallVector<int, 16> SlotMapping(NumObjs, -1); + SmallVector<float, 16> SlotWeights(NumObjs, 0.0); + SmallVector<SmallVector<int, 4>, 16> RevMap(NumObjs); + BitVector UsedColors(NumObjs); + + DEBUG(dbgs() << "Color spill slot intervals:\n"); + bool Changed = false; + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + int NewSS = ColorSlot(li); + assert(NewSS >= 0 && "Stack coloring failed?"); + SlotMapping[SS] = NewSS; + RevMap[NewSS].push_back(SS); + SlotWeights[NewSS] += li->weight; + UsedColors.set(NewSS); + Changed |= (SS != NewSS); + } + + DEBUG(dbgs() << "\nSpill slots after coloring:\n"); + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { + LiveInterval *li = SSIntervals[i]; + int SS = TargetRegisterInfo::stackSlot2Index(li->reg); + li->weight = SlotWeights[SS]; + } + // Sort them by new weight. + std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter()); + +#ifndef NDEBUG + for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) + DEBUG(SSIntervals[i]->dump()); + DEBUG(dbgs() << '\n'); +#endif + + if (!Changed) + return false; + + // Rewrite all MachineMemOperands. + for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { + int NewFI = SlotMapping[SS]; + if (NewFI == -1 || (NewFI == (int)SS)) + continue; + + const PseudoSourceValue *NewSV = MF.getPSVManager().getFixedStack(NewFI); + SmallVectorImpl<MachineMemOperand *> &RefMMOs = SSRefs[SS]; + for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) + RefMMOs[i]->setValue(NewSV); + } + + // Rewrite all MO_FrameIndex operands. Look for dead stores. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) + RewriteInstruction(MII, SlotMapping, MF); + RemoveDeadStores(MBB); + } + + // Delete unused stack slots. + while (NextColor != -1) { + DEBUG(dbgs() << "Removing unused stack object fi#" << NextColor << "\n"); + MFI->RemoveStackObject(NextColor); + NextColor = AllColors.find_next(NextColor); + } + + return true; +} + +/// RewriteInstruction - Rewrite specified instruction by replacing references +/// to old frame index with new one. +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, + SmallVectorImpl<int> &SlotMapping, + MachineFunction &MF) { + // Update the operands. + for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isFI()) + continue; + int OldFI = MO.getIndex(); + if (OldFI < 0) + continue; + int NewFI = SlotMapping[OldFI]; + if (NewFI == -1 || NewFI == OldFI) + continue; + MO.setIndex(NewFI); + } + + // The MachineMemOperands have already been updated. +} + + +/// RemoveDeadStores - Scan through a basic block and look for loads followed +/// by stores. If they're both using the same stack slot, then the store is +/// definitely dead. This could obviously be much more aggressive (consider +/// pairs with instructions between them), but such extensions might have a +/// considerable compile time impact. +bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { + // FIXME: This could be much more aggressive, but we need to investigate + // the compile time impact of doing so. + bool changed = false; + + SmallVector<MachineInstr*, 4> toErase; + + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (DCELimit != -1 && (int)NumDead >= DCELimit) + break; + + int FirstSS, SecondSS; + if (TII->isStackSlotCopy(I, FirstSS, SecondSS) && + FirstSS == SecondSS && + FirstSS != -1) { + ++NumDead; + changed = true; + toErase.push_back(I); + continue; + } + + MachineBasicBlock::iterator NextMI = std::next(I); + if (NextMI == MBB->end()) continue; + + unsigned LoadReg = 0; + unsigned StoreReg = 0; + if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; + if (!(StoreReg = TII->isStoreToStackSlot(NextMI, SecondSS))) continue; + if (FirstSS != SecondSS || LoadReg != StoreReg || FirstSS == -1) continue; + + ++NumDead; + changed = true; + + if (NextMI->findRegisterUseOperandIdx(LoadReg, true, nullptr) != -1) { + ++NumDead; + toErase.push_back(I); + } + + toErase.push_back(NextMI); + ++I; + } + + for (SmallVectorImpl<MachineInstr *>::iterator I = toErase.begin(), + E = toErase.end(); I != E; ++I) + (*I)->eraseFromParent(); + + return changed; +} + + +bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Stack Slot Coloring **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + MFI = MF.getFrameInfo(); + TII = MF.getSubtarget().getInstrInfo(); + LS = &getAnalysis<LiveStacks>(); + MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); + + bool Changed = false; + + unsigned NumSlots = LS->getNumIntervals(); + if (NumSlots == 0) + // Nothing to do! + return false; + + // If there are calls to setjmp or sigsetjmp, don't perform stack slot + // coloring. The stack could be modified before the longjmp is executed, + // resulting in the wrong value being used afterwards. (See + // <rdar://problem/8007500>.) + if (MF.exposesReturnsTwice()) + return false; + + // Gather spill slot references + ScanForSpillSlotRefs(MF); + InitializeSlots(); + Changed = ColorSlots(MF); + + NextColor = -1; + SSIntervals.clear(); + for (unsigned i = 0, e = SSRefs.size(); i != e; ++i) + SSRefs[i].clear(); + SSRefs.clear(); + OrigAlignments.clear(); + OrigSizes.clear(); + AllColors.clear(); + UsedColors.clear(); + for (unsigned i = 0, e = Assignments.size(); i != e; ++i) + Assignments[i].clear(); + Assignments.clear(); + + return Changed; +} diff --git a/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp new file mode 100644 index 0000000..3f60e18 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/StatepointExampleGC.cpp @@ -0,0 +1,55 @@ +//===-- StatepointDefaultGC.cpp - The default statepoint GC strategy ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a GCStrategy which serves as an example for the usage +// of a statepoint based lowering strategy. This GCStrategy is intended to +// suitable as a default implementation usable with any collector which can +// consume the standard stackmap format generated by statepoints, uses the +// default addrespace to distinguish between gc managed and non-gc managed +// pointers, and has reasonable relocation semantics. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +namespace { +class StatepointGC : public GCStrategy { +public: + StatepointGC() { + UseStatepoints = true; + // These options are all gc.root specific, we specify them so that the + // gc.root lowering code doesn't run. + InitRoots = false; + NeededSafePoints = 0; + UsesMetadata = false; + CustomRoots = false; + } + Optional<bool> isGCManagedPointer(const Type *Ty) const override { + // Method is only valid on pointer typed values. + const PointerType *PT = cast<PointerType>(Ty); + // For the sake of this example GC, we arbitrarily pick addrspace(1) as our + // GC managed heap. We know that a pointer into this heap needs to be + // updated and that no other pointer does. Note that addrspace(1) is used + // only as an example, it has no special meaning, and is not reserved for + // GC usage. + return (1 == PT->getAddressSpace()); + } +}; +} + +static GCRegistry::Add<StatepointGC> X("statepoint-example", + "an example strategy for statepoint"); + +namespace llvm { +void linkStatepointExampleGC() {} +} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp new file mode 100644 index 0000000..d2fbf53 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -0,0 +1,988 @@ +//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass duplicates basic blocks ending in unconditional branches into +// the tails of their predecessors. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "tailduplication" + +STATISTIC(NumTails , "Number of tails duplicated"); +STATISTIC(NumTailDups , "Number of tail duplicated blocks"); +STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); +STATISTIC(NumAddedPHIs , "Number of phis added"); + +// Heuristic for tail duplication. +static cl::opt<unsigned> +TailDuplicateSize("tail-dup-size", + cl::desc("Maximum instructions to consider tail duplicating"), + cl::init(2), cl::Hidden); + +static cl::opt<bool> +TailDupVerify("tail-dup-verify", + cl::desc("Verify sanity of PHI instructions during taildup"), + cl::init(false), cl::Hidden); + +static cl::opt<unsigned> +TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); + +typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; + +namespace { + /// Perform tail duplication. + class TailDuplicatePass : public MachineFunctionPass { + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const MachineBranchProbabilityInfo *MBPI; + MachineModuleInfo *MMI; + MachineRegisterInfo *MRI; + std::unique_ptr<RegScavenger> RS; + bool PreRegAlloc; + + // A list of virtual registers for which to update SSA form. + SmallVector<unsigned, 16> SSAUpdateVRs; + + // For each virtual register in SSAUpdateVals keep a list of source virtual + // registers. + DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; + + public: + static char ID; + explicit TailDuplicatePass() : + MachineFunctionPass(ID), PreRegAlloc(false) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + private: + void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB); + void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVectorImpl<std::pair<unsigned,unsigned> > &Copies, + const DenseSet<unsigned> &UsedByPhi, + bool Remove); + void DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi); + void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallSetVector<MachineBasicBlock*, 8> &Succs); + bool TailDuplicateBlocks(MachineFunction &MF); + bool shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, MachineBasicBlock &TailBB); + bool isSimpleBB(MachineBasicBlock *TailBB); + bool canCompletelyDuplicateBB(MachineBasicBlock &BB); + bool duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + const DenseSet<unsigned> &RegsUsedByPhi, + SmallVectorImpl<MachineInstr *> &Copies); + bool TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies); + bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF); + + void RemoveDeadBlock(MachineBasicBlock *MBB); + }; + + char TailDuplicatePass::ID = 0; +} + +char &llvm::TailDuplicateID = TailDuplicatePass::ID; + +INITIALIZE_PASS(TailDuplicatePass, "tailduplication", "Tail Duplication", + false, false) + +bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { + if (skipOptnoneFunction(*MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + MRI = &MF.getRegInfo(); + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); + + PreRegAlloc = MRI->isSSA(); + RS.reset(); + if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) + RS.reset(new RegScavenger()); + + bool MadeChange = false; + while (TailDuplicateBlocks(MF)) + MadeChange = true; + + return MadeChange; +} + +void TailDuplicatePass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<MachineBranchProbabilityInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *MBB = &*I; + SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), + MBB->pred_end()); + MachineBasicBlock::iterator MI = MBB->begin(); + while (MI != MBB->end()) { + if (!MI->isPHI()) + break; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + bool Found = false; + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (PHIBB == PredBB) { + Found = true; + break; + } + } + if (!Found) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " missing input from predecessor BB#" + << PredBB->getNumber() << '\n'; + llvm_unreachable(nullptr); + } + } + + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { + MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); + if (CheckExtra && !Preds.count(PHIBB)) { + dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() + << ": " << *MI; + dbgs() << " extra input from predecessor BB#" + << PHIBB->getNumber() << '\n'; + llvm_unreachable(nullptr); + } + if (PHIBB->getNumber() < 0) { + dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; + dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; + llvm_unreachable(nullptr); + } + } + ++MI; + } + } +} + +/// Tail duplicate the block and cleanup. +bool +TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB, + bool IsSimple, + MachineFunction &MF) { + // Save the successors list. + SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), + MBB->succ_end()); + + SmallVector<MachineBasicBlock*, 8> TDBBs; + SmallVector<MachineInstr*, 16> Copies; + if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies)) + return false; + + ++NumTails; + + SmallVector<MachineInstr*, 8> NewPHIs; + MachineSSAUpdater SSAUpdate(MF, &NewPHIs); + + // TailBB's immediate successors are now successors of those predecessors + // which duplicated TailBB. Add the predecessors as sources to the PHI + // instructions. + bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); + if (PreRegAlloc) + UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); + + // If it is dead, remove it. + if (isDead) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + ++NumDeadBlocks; + } + + // Update SSA form. + if (!SSAUpdateVRs.empty()) { + for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { + unsigned VReg = SSAUpdateVRs[i]; + SSAUpdate.Initialize(VReg); + + // If the original definition is still around, add it as an available + // value. + MachineInstr *DefMI = MRI->getVRegDef(VReg); + MachineBasicBlock *DefBB = nullptr; + if (DefMI) { + DefBB = DefMI->getParent(); + SSAUpdate.AddAvailableValue(DefBB, VReg); + } + + // Add the new vregs as available values. + DenseMap<unsigned, AvailableValsTy>::iterator LI = + SSAUpdateVals.find(VReg); + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + unsigned SrcReg = LI->second[j].second; + SSAUpdate.AddAvailableValue(SrcBB, SrcReg); + } + + // Rewrite uses that are outside of the original def's block. + MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); + while (UI != MRI->use_end()) { + MachineOperand &UseMO = *UI; + MachineInstr *UseMI = UseMO.getParent(); + ++UI; + if (UseMI->isDebugValue()) { + // SSAUpdate can replace the use with an undef. That creates + // a debug instruction that is a kill. + // FIXME: Should it SSAUpdate job to delete debug instructions + // instead of replacing the use with undef? + UseMI->eraseFromParent(); + continue; + } + if (UseMI->getParent() == DefBB && !UseMI->isPHI()) + continue; + SSAUpdate.RewriteUse(UseMO); + } + } + + SSAUpdateVRs.clear(); + SSAUpdateVals.clear(); + } + + // Eliminate some of the copies inserted by tail duplication to maintain + // SSA form. + for (unsigned i = 0, e = Copies.size(); i != e; ++i) { + MachineInstr *Copy = Copies[i]; + if (!Copy->isCopy()) + continue; + unsigned Dst = Copy->getOperand(0).getReg(); + unsigned Src = Copy->getOperand(1).getReg(); + if (MRI->hasOneNonDBGUse(Src) && + MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { + // Copy is the only use. Do trivial copy propagation here. + MRI->replaceRegWith(Dst, Src); + Copy->eraseFromParent(); + } + } + + if (NewPHIs.size()) + NumAddedPHIs += NewPHIs.size(); + + return true; +} + +/// Look for small blocks that are unconditionally branched to and do not fall +/// through. Tail-duplicate their instructions into their predecessors to +/// eliminate (dynamic) branches. +bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + if (PreRegAlloc && TailDupVerify) { + DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); + VerifyPHIs(MF, true); + } + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = &*I++; + + if (NumTails == TailDupLimit) + break; + + bool IsSimple = isSimpleBB(MBB); + + if (!shouldTailDuplicate(MF, IsSimple, *MBB)) + continue; + + MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF); + } + + if (PreRegAlloc && TailDupVerify) + VerifyPHIs(MF, false); + + return MadeChange; +} + +static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { + if (UseMI.isDebugValue()) + continue; + if (UseMI.getParent() != BB) + return true; + } + return false; +} + +static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { + for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) + if (MI->getOperand(i+1).getMBB() == SrcBB) + return i; + return 0; +} + + +// Remember which registers are used by phis in this block. This is +// used to determine which registers are liveout while modifying the +// block (which is why we need to copy the information). +static void getRegsUsedByPHIs(const MachineBasicBlock &BB, + DenseSet<unsigned> *UsedByPhi) { + for (const auto &MI : BB) { + if (!MI.isPHI()) + break; + for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { + unsigned SrcReg = MI.getOperand(i).getReg(); + UsedByPhi->insert(SrcReg); + } + } +} + +/// Add a definition and source virtual registers pair for SSA update. +void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, + MachineBasicBlock *BB) { + DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); + if (LI != SSAUpdateVals.end()) + LI->second.push_back(std::make_pair(BB, NewReg)); + else { + AvailableValsTy Vals; + Vals.push_back(std::make_pair(BB, NewReg)); + SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); + SSAUpdateVRs.push_back(OrigReg); + } +} + +/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the +/// source register that's contributed by PredBB and update SSA update map. +void TailDuplicatePass::ProcessPHI( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + DenseMap<unsigned, unsigned> &LocalVRMap, + SmallVectorImpl<std::pair<unsigned, unsigned> > &Copies, + const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { + unsigned DefReg = MI->getOperand(0).getReg(); + unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); + assert(SrcOpIdx && "Unable to find matching PHI source?"); + unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); + const TargetRegisterClass *RC = MRI->getRegClass(DefReg); + LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); + + // Insert a copy from source to the end of the block. The def register is the + // available value liveout of the block. + unsigned NewDef = MRI->createVirtualRegister(RC); + Copies.push_back(std::make_pair(NewDef, SrcReg)); + if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) + AddSSAUpdateEntry(DefReg, NewDef, PredBB); + + if (!Remove) + return; + + // Remove PredBB from the PHI node. + MI->RemoveOperand(SrcOpIdx+1); + MI->RemoveOperand(SrcOpIdx); + if (MI->getNumOperands() == 1) + MI->eraseFromParent(); +} + +/// Duplicate a TailBB instruction to PredBB and update +/// the source operands due to earlier PHI translation. +void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, + MachineBasicBlock *TailBB, + MachineBasicBlock *PredBB, + MachineFunction &MF, + DenseMap<unsigned, unsigned> &LocalVRMap, + const DenseSet<unsigned> &UsedByPhi) { + MachineInstr *NewMI = TII->duplicate(MI, MF); + for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = NewMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + if (MO.isDef()) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + unsigned NewReg = MRI->createVirtualRegister(RC); + MO.setReg(NewReg); + LocalVRMap.insert(std::make_pair(Reg, NewReg)); + if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) + AddSSAUpdateEntry(Reg, NewReg, PredBB); + } else { + DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); + if (VI != LocalVRMap.end()) { + MO.setReg(VI->second); + // Clear any kill flags from this operand. The new register could have + // uses after this one, so kills are not valid here. + MO.setIsKill(false); + MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); + } + } + } + PredBB->insert(PredBB->instr_end(), NewMI); +} + +/// After FromBB is tail duplicated into its predecessor blocks, the successors +/// have gained new predecessors. Update the PHI instructions in them +/// accordingly. +void +TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallSetVector<MachineBasicBlock*,8> &Succs) { + for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), + SE = Succs.end(); SI != SE; ++SI) { + MachineBasicBlock *SuccBB = *SI; + for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); + II != EE; ++II) { + if (!II->isPHI()) + break; + MachineInstrBuilder MIB(*FromBB->getParent(), II); + unsigned Idx = 0; + for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + Idx = i; + break; + } + } + + assert(Idx != 0); + MachineOperand &MO0 = II->getOperand(Idx); + unsigned Reg = MO0.getReg(); + if (isDead) { + // Folded into the previous BB. + // There could be duplicate phi source entries. FIXME: Should sdisel + // or earlier pass fixed this? + for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { + MachineOperand &MO = II->getOperand(i+1); + if (MO.getMBB() == FromBB) { + II->RemoveOperand(i+1); + II->RemoveOperand(i); + } + } + } else + Idx = 0; + + // If Idx is set, the operands at Idx and Idx+1 must be removed. + // We reuse the location to avoid expensive RemoveOperand calls. + + DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); + if (LI != SSAUpdateVals.end()) { + // This register is defined in the tail block. + for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = LI->second[j].first; + // If we didn't duplicate a bb into a particular predecessor, we + // might still have added an entry to SSAUpdateVals to correcly + // recompute SSA. If that case, avoid adding a dummy extra argument + // this PHI. + if (!SrcBB->isSuccessor(SuccBB)) + continue; + + unsigned SrcReg = LI->second[j].second; + if (Idx != 0) { + II->getOperand(Idx).setReg(SrcReg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + MIB.addReg(SrcReg).addMBB(SrcBB); + } + } + } else { + // Live in tail block, must also be live in predecessors. + for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { + MachineBasicBlock *SrcBB = TDBBs[j]; + if (Idx != 0) { + II->getOperand(Idx).setReg(Reg); + II->getOperand(Idx+1).setMBB(SrcBB); + Idx = 0; + } else { + MIB.addReg(Reg).addMBB(SrcBB); + } + } + } + if (Idx != 0) { + II->RemoveOperand(Idx+1); + II->RemoveOperand(Idx); + } + } + } +} + +/// Determine if it is profitable to duplicate this block. +bool +TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, + bool IsSimple, + MachineBasicBlock &TailBB) { + // Only duplicate blocks that end with unconditional branches. + if (TailBB.canFallThrough()) + return false; + + // Don't try to tail-duplicate single-block loops. + if (TailBB.isSuccessor(&TailBB)) + return false; + + // Set the limit on the cost to duplicate. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount; + if (TailDuplicateSize.getNumOccurrences() == 0 && + // FIXME: Use Function::optForSize(). + MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) + MaxDuplicateCount = 1; + else + MaxDuplicateCount = TailDuplicateSize; + + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging and other optimizations + // that rearrange the predecessors of the indirect branch. + + bool HasIndirectbr = false; + if (!TailBB.empty()) + HasIndirectbr = TailBB.back().isIndirectBranch(); + + if (HasIndirectbr && PreRegAlloc) + MaxDuplicateCount = 20; + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned InstrCount = 0; + for (MachineInstr &MI : TailBB) { + // Non-duplicable things shouldn't be tail-duplicated. + if (MI.isNotDuplicable()) + return false; + + // Do not duplicate 'return' instructions if this is a pre-regalloc run. + // A return may expand into a lot more instructions (e.g. reload of callee + // saved registers) after PEI. + if (PreRegAlloc && MI.isReturn()) + return false; + + // Avoid duplicating calls before register allocation. Calls presents a + // barrier to register allocation so duplicating them may end up increasing + // spills. + if (PreRegAlloc && MI.isCall()) + return false; + + if (!MI.isPHI() && !MI.isDebugValue()) + InstrCount += 1; + + if (InstrCount > MaxDuplicateCount) + return false; + } + + // Check if any of the successors of TailBB has a PHI node in which the + // value corresponding to TailBB uses a subregister. + // If a phi node uses a register paired with a subregister, the actual + // "value type" of the phi may differ from the type of the register without + // any subregisters. Due to a bug, tail duplication may add a new operand + // without a necessary subregister, producing an invalid code. This is + // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. + // Disable tail duplication for this case for now, until the problem is + // fixed. + for (auto SB : TailBB.successors()) { + for (auto &I : *SB) { + if (!I.isPHI()) + break; + unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); + assert(Idx != 0); + MachineOperand &PU = I.getOperand(Idx); + if (PU.getSubReg() != 0) + return false; + } + } + + if (HasIndirectbr && PreRegAlloc) + return true; + + if (IsSimple) + return true; + + if (!PreRegAlloc) + return true; + + return canCompletelyDuplicateBB(TailBB); +} + +/// True if this BB has only one unconditional jump. +bool +TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) { + if (TailBB->succ_size() != 1) + return false; + if (TailBB->pred_empty()) + return false; + MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); + if (I == TailBB->end()) + return true; + return I->isUnconditionalBranch(); +} + +static bool +bothUsedInPHI(const MachineBasicBlock &A, + SmallPtrSet<MachineBasicBlock*, 8> SuccsB) { + for (MachineBasicBlock *BB : A.successors()) + if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) + return true; + + return false; +} + +bool +TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { + for (MachineBasicBlock *PredBB : BB.predecessors()) { + if (PredBB->succ_size() > 1) + return false; + + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + return false; + + if (!PredCond.empty()) + return false; + } + return true; +} + +bool +TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + const DenseSet<unsigned> &UsedByPhi, + SmallVectorImpl<MachineInstr *> &Copies) { + SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(), + TailBB->succ_end()); + SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + bool Changed = false; + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + if (PredBB->hasEHPadSuccessor()) + continue; + + if (bothUsedInPHI(*PredBB, Succs)) + continue; + + MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + + Changed = true; + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From simple Succ: " << *TailBB); + + MachineBasicBlock *NewTarget = *TailBB->succ_begin(); + MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); + + // Make PredFBB explicit. + if (PredCond.empty()) + PredFBB = PredTBB; + + // Make fall through explicit. + if (!PredTBB) + PredTBB = NextBB; + if (!PredFBB) + PredFBB = NextBB; + + // Redirect + if (PredFBB == TailBB) + PredFBB = NewTarget; + if (PredTBB == TailBB) + PredTBB = NewTarget; + + // Make the branch unconditional if possible + if (PredTBB == PredFBB) { + PredCond.clear(); + PredFBB = nullptr; + } + + // Avoid adding fall through branches. + if (PredFBB == NextBB) + PredFBB = nullptr; + if (PredTBB == NextBB && PredFBB == nullptr) + PredTBB = nullptr; + + TII->RemoveBranch(*PredBB); + + if (PredTBB) + TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); + + if (!PredBB->isSuccessor(NewTarget)) + PredBB->replaceSuccessor(TailBB, NewTarget); + else { + PredBB->removeSuccessor(TailBB, true); + assert(PredBB->succ_size() <= 1); + } + + TDBBs.push_back(PredBB); + } + return Changed; +} + +/// If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool +TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, + bool IsSimple, + MachineFunction &MF, + SmallVectorImpl<MachineBasicBlock *> &TDBBs, + SmallVectorImpl<MachineInstr *> &Copies) { + DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); + + DenseSet<unsigned> UsedByPhi; + getRegsUsedByPHIs(*TailBB, &UsedByPhi); + + if (IsSimple) + return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() > 1) + continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector<MachineOperand, 4> PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + continue; + + DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + TDBBs.push_back(PredBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + + if (RS && !TailBB->livein_empty()) { + // Update PredBB livein. + RS->enterBasicBlock(PredBB); + if (!PredBB->empty()) + RS->forward(std::prev(PredBB->end())); + for (const auto &LI : TailBB->liveins()) { + if (!RS->isRegUsed(LI.PhysReg, false)) + // If a register is previously livein to the tail but it's not live + // at the end of predecessor BB, then it should be added to its + // livein list. + PredBB->addLiveIn(LI); + } + } + + // Clone the contents of TailBB into PredBB. + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + // Use instr_iterator here to properly handle bundles, e.g. + // ARM Thumb2 IT block. + MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); + while (I != TailBB->instr_end()) { + MachineInstr *MI = &*I; + ++I; + if (MI->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); + } else { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); + } + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); + } + + // Simplify + TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); + + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); + + Changed = true; + ++NumTailDups; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); + MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; + SmallVector<MachineOperand, 4> PriorCond; + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (PrevBB->succ_size() == 1 && + !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && + PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(dbgs() << "\nMerging into block: " << *PrevBB + << "From MBB: " << *TailBB); + if (PreRegAlloc) { + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); + if (MI->getParent()) + MI->eraseFromParent(); + } + + // Now copy the non-PHI instructions. + while (I != TailBB->end()) { + // Replace def of virtual registers with new registers, and update + // uses with PHI source register or the new registers. + MachineInstr *MI = &*I++; + assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); + DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); + MI->eraseFromParent(); + } + MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first) + .addReg(CopyInfos[i].second)); + } + } else { + // No PHIs to worry about, just splice the instructions over. + PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); + } + PrevBB->removeSuccessor(PrevBB->succ_begin()); + assert(PrevBB->succ_empty()); + PrevBB->transferSuccessors(TailBB); + TDBBs.push_back(PrevBB); + Changed = true; + } + + // If this is after register allocation, there are no phis to fix. + if (!PreRegAlloc) + return Changed; + + // If we made no changes so far, we are safe. + if (!Changed) + return Changed; + + + // Handle the nasty case in that we duplicated a block that is part of a loop + // into some but not all of its predecessors. For example: + // 1 -> 2 <-> 3 | + // \ | + // \---> rest | + // if we duplicate 2 into 1 but not into 3, we end up with + // 12 -> 3 <-> 2 -> rest | + // \ / | + // \----->-----/ | + // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced + // with a phi in 3 (which now dominates 2). + // What we do here is introduce a copy in 3 of the register defined by the + // phi, just like when we are duplicating 2 into 3, but we don't copy any + // real instructions or remove the 3 -> 2 edge from the phi in 2. + for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) + continue; + + // EH edges + if (PredBB->succ_size() != 1) + continue; + + DenseMap<unsigned, unsigned> LocalVRMap; + SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; + MachineBasicBlock::iterator I = TailBB->begin(); + // Process PHI instructions first. + while (I != TailBB->end() && I->isPHI()) { + // Replace the uses of the def of the PHI with the register coming + // from PredBB. + MachineInstr *MI = &*I++; + ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); + } + MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); + for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); + } + } + + return Changed; +} + +/// Remove the specified dead machine basic block from the function, updating +/// the CFG. +void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); + + // Remove all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // Remove the block. + MBB->eraseFromParent(); +} diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp new file mode 100644 index 0000000..679ade1 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -0,0 +1,94 @@ +//===----- TargetFrameLoweringImpl.cpp - Implement target frame interface --==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implements the layout of a stack frame on the target machine. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Function.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cstdlib> +using namespace llvm; + +TargetFrameLowering::~TargetFrameLowering() { +} + +/// The default implementation just looks at attribute "no-frame-pointer-elim". +bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const { + auto Attr = MF.getFunction()->getFnAttribute("no-frame-pointer-elim"); + return Attr.getValueAsString() == "true"; +} + +/// Returns the displacement from the frame register to the stack +/// frame of the specified index, along with the frame register used +/// (in output arg FrameReg). This is the default implementation which +/// is overridden for some targets. +int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + // By default, assume all frame indices are referenced via whatever + // getFrameRegister() says. The target can override this if it's doing + // something different. + FrameReg = RI->getFrameRegister(MF); + + return MFI->getObjectOffset(FI) + MFI->getStackSize() - + getOffsetOfLocalArea() + MFI->getOffsetAdjustment(); +} + +bool TargetFrameLowering::needsFrameIndexResolution( + const MachineFunction &MF) const { + return MF.getFrameInfo()->hasStackObjects(); +} + +void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF, + BitVector &SavedRegs, + RegScavenger *RS) const { + // Get the callee saved register list... + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); + + // Early exit if there are no callee saved registers. + if (!CSRegs || CSRegs[0] == 0) + return; + + SavedRegs.resize(TRI.getNumRegs()); + + // In Naked functions we aren't going to save any registers. + if (MF.getFunction()->hasFnAttribute(Attribute::Naked)) + return; + + // Functions which call __builtin_unwind_init get all their registers saved. + bool CallsUnwindInit = MF.getMMI().callsUnwindInit(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + for (unsigned i = 0; CSRegs[i]; ++i) { + unsigned Reg = CSRegs[i]; + if (CallsUnwindInit || MRI.isPhysRegModified(Reg)) + SavedRegs.set(Reg); + } +} + +unsigned TargetFrameLowering::getStackAlignmentSkew( + const MachineFunction &MF) const { + // When HHVM function is called, the stack is skewed as the return address + // is removed from the stack before we enter the function. + if (LLVM_UNLIKELY(MF.getFunction()->getCallingConv() == CallingConv::HHVM)) + return MF.getTarget().getPointerSize(); + + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp new file mode 100644 index 0000000..6eaf991 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -0,0 +1,1210 @@ +//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include <cctype> +using namespace llvm; + +static cl::opt<bool> DisableHazardRecognizer( + "disable-sched-hazard", cl::Hidden, cl::init(false), + cl::desc("Disable hazard detection during preRA scheduling")); + +TargetInstrInfo::~TargetInstrInfo() { +} + +const TargetRegisterClass* +TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const { + if (OpNum >= MCID.getNumOperands()) + return nullptr; + + short RegClass = MCID.OpInfo[OpNum].RegClass; + if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) + return TRI->getPointerRegClass(MF, RegClass); + + // Instructions like INSERT_SUBREG do not have fixed register classes. + if (RegClass < 0) + return nullptr; + + // Otherwise just look it up normally. + return TRI->getRegClass(RegClass); +} + +/// insertNoop - Insert a noop into the instruction stream at the specified +/// point. +void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + llvm_unreachable("Target didn't implement insertNoop!"); +} + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorString or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorString or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + + + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) + atInsnStart = true; + if (atInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + return Length; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + +MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr *MI, + bool NewMI, + unsigned Idx1, + unsigned Idx2) const { + const MCInstrDesc &MCID = MI->getDesc(); + bool HasDef = MCID.getNumDefs(); + if (HasDef && !MI->getOperand(0).isReg()) + // No idea how to commute this instruction. Target should implement its own. + return nullptr; + + unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1; + unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2; + assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) && + CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 && + "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands."); + assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && + "This only knows how to commute register operands so far"); + + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; + unsigned Reg1 = MI->getOperand(Idx1).getReg(); + unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); + bool Reg1IsKill = MI->getOperand(Idx1).isKill(); + bool Reg2IsKill = MI->getOperand(Idx2).isKill(); + bool Reg1IsUndef = MI->getOperand(Idx1).isUndef(); + bool Reg2IsUndef = MI->getOperand(Idx2).isUndef(); + bool Reg1IsInternal = MI->getOperand(Idx1).isInternalRead(); + bool Reg2IsInternal = MI->getOperand(Idx2).isInternalRead(); + // If destination is tied to either of the commuted source register, then + // it must be updated. + if (HasDef && Reg0 == Reg1 && + MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { + Reg2IsKill = false; + Reg0 = Reg2; + SubReg0 = SubReg2; + } else if (HasDef && Reg0 == Reg2 && + MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { + Reg1IsKill = false; + Reg0 = Reg1; + SubReg0 = SubReg1; + } + + if (NewMI) { + // Create a new instruction. + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + } + + if (HasDef) { + MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } + MI->getOperand(Idx2).setReg(Reg1); + MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); + MI->getOperand(Idx2).setIsKill(Reg1IsKill); + MI->getOperand(Idx1).setIsKill(Reg2IsKill); + MI->getOperand(Idx2).setIsUndef(Reg1IsUndef); + MI->getOperand(Idx1).setIsUndef(Reg2IsUndef); + MI->getOperand(Idx2).setIsInternalRead(Reg1IsInternal); + MI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal); + return MI; +} + +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI, + unsigned OpIdx1, + unsigned OpIdx2) const { + // If OpIdx1 or OpIdx2 is not specified, then this method is free to choose + // any commutable operand, which is done in findCommutedOpIndices() method + // called below. + if ((OpIdx1 == CommuteAnyOperandIndex || OpIdx2 == CommuteAnyOperandIndex) && + !findCommutedOpIndices(MI, OpIdx1, OpIdx2)) { + assert(MI->isCommutable() && + "Precondition violation: MI must be commutable."); + return nullptr; + } + return commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); +} + +bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1, + unsigned &ResultIdx2, + unsigned CommutableOpIdx1, + unsigned CommutableOpIdx2) { + if (ResultIdx1 == CommuteAnyOperandIndex && + ResultIdx2 == CommuteAnyOperandIndex) { + ResultIdx1 = CommutableOpIdx1; + ResultIdx2 = CommutableOpIdx2; + } else if (ResultIdx1 == CommuteAnyOperandIndex) { + if (ResultIdx2 == CommutableOpIdx1) + ResultIdx1 = CommutableOpIdx2; + else if (ResultIdx2 == CommutableOpIdx2) + ResultIdx1 = CommutableOpIdx1; + else + return false; + } else if (ResultIdx2 == CommuteAnyOperandIndex) { + if (ResultIdx1 == CommutableOpIdx1) + ResultIdx2 = CommutableOpIdx2; + else if (ResultIdx1 == CommutableOpIdx2) + ResultIdx2 = CommutableOpIdx1; + else + return false; + } else + // Check that the result operand indices match the given commutable + // operand indices. + return (ResultIdx1 == CommutableOpIdx1 && ResultIdx2 == CommutableOpIdx2) || + (ResultIdx1 == CommutableOpIdx2 && ResultIdx2 == CommutableOpIdx1); + + return true; +} + +bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + assert(!MI->isBundle() && + "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); + + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isCommutable()) + return false; + + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this + // is not true, then the target must implement this. + unsigned CommutableOpIdx1 = MCID.getNumDefs(); + unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1; + if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, + CommutableOpIdx1, CommutableOpIdx2)) + return false; + + if (!MI->getOperand(SrcOpIdx1).isReg() || + !MI->getOperand(SrcOpIdx2).isReg()) + // No idea. + return false; + return true; +} + +bool +TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + if (!MI->isPredicable()) + return true; + return !isPredicated(MI); +} + +bool TargetInstrInfo::PredicateInstruction( + MachineInstr *MI, ArrayRef<MachineOperand> Pred) const { + bool MadeChange = false; + + assert(!MI->isBundle() && + "TargetInstrInfo::PredicateInstruction() can't handle bundles"); + + const MCInstrDesc &MCID = MI->getDesc(); + if (!MI->isPredicable()) + return false; + + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MCID.OpInfo[i].isPredicate()) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + MO.setReg(Pred[j].getReg()); + MadeChange = true; + } else if (MO.isImm()) { + MO.setImm(Pred[j].getImm()); + MadeChange = true; + } else if (MO.isMBB()) { + MO.setMBB(Pred[j].getMBB()); + MadeChange = true; + } + ++j; + } + } + return MadeChange; +} + +bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad()) { + if (const FixedStackPseudoSourceValue *Value = + dyn_cast_or_null<FixedStackPseudoSourceValue>( + (*o)->getPseudoValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + } + return false; +} + +bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore()) { + if (const FixedStackPseudoSourceValue *Value = + dyn_cast_or_null<FixedStackPseudoSourceValue>( + (*o)->getPseudoValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + } + return false; +} + +bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, + unsigned SubIdx, unsigned &Size, + unsigned &Offset, + const MachineFunction &MF) const { + if (!SubIdx) { + Size = RC->getSize(); + Offset = 0; + return true; + } + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + unsigned BitSize = TRI->getSubRegIdxSize(SubIdx); + // Convert bit size to byte size to be consistent with + // MCRegisterClass::getSize(). + if (BitSize % 8) + return false; + + int BitOffset = TRI->getSubRegIdxOffset(SubIdx); + if (BitOffset < 0 || BitOffset % 8) + return false; + + Size = BitSize /= 8; + Offset = (unsigned)BitOffset / 8; + + assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + + if (!MF.getDataLayout().isLittleEndian()) { + Offset = RC->getSize() - (Offset + Size); + } + return true; +} + +void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo &TRI) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); + MBB.insert(I, MI); +} + +bool +TargetInstrInfo::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); +} + +MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { + assert(!Orig->isNotDuplicable() && + "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(Orig); +} + +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return nullptr; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return nullptr; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : nullptr; + + if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return nullptr; +} + +void TargetInstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { + llvm_unreachable("Not a MachO target"); +} + +static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr *MI, + ArrayRef<unsigned> Ops, int FrameIndex, + const TargetInstrInfo &TII) { + unsigned StartIdx = 0; + switch (MI->getOpcode()) { + case TargetOpcode::STACKMAP: + StartIdx = 2; // Skip ID, nShadowBytes. + break; + case TargetOpcode::PATCHPOINT: { + // For PatchPoint, the call args are not foldable. + PatchPointOpers opers(MI); + StartIdx = opers.getVarIdx(); + break; + } + default: + llvm_unreachable("unexpected stackmap opcode"); + } + + // Return false if any operands requested for folding are not foldable (not + // part of the stackmap's live values). + for (unsigned Op : Ops) { + if (Op < StartIdx) + return nullptr; + } + + MachineInstr *NewMI = + MF.CreateMachineInstr(TII.get(MI->getOpcode()), MI->getDebugLoc(), true); + MachineInstrBuilder MIB(MF, NewMI); + + // No need to fold return, the meta data, and function arguments + for (unsigned i = 0; i < StartIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + + for (unsigned i = StartIdx; i < MI->getNumOperands(); ++i) { + MachineOperand &MO = MI->getOperand(i); + if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) { + unsigned SpillSize; + unsigned SpillOffset; + // Compute the spill slot size and offset. + const TargetRegisterClass *RC = + MF.getRegInfo().getRegClass(MO.getReg()); + bool Valid = + TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF); + if (!Valid) + report_fatal_error("cannot spill patchpoint subregister operand"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(SpillSize); + MIB.addFrameIndex(FrameIndex); + MIB.addImm(SpillOffset); + } + else + MIB.addOperand(MO); + } + return NewMI; +} + +/// foldMemoryOperand - Attempt to fold a load or store of the specified stack +/// slot into the specified machine instruction for the specified operand(s). +/// If this is possible, a new instruction is returned with the specified +/// operand folded, otherwise NULL is returned. The client is responsible for +/// removing the old instruction and adding the new one in the instruction +/// stream. +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + int FI) const { + unsigned Flags = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (MI->getOperand(Ops[i]).isDef()) + Flags |= MachineMemOperand::MOStore; + else + Flags |= MachineMemOperand::MOLoad; + + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + + MachineInstr *NewMI = nullptr; + + if (MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FI, *this); + if (NewMI) + MBB->insert(MI, NewMI); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI); + } + + if (NewMI) { + NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); + + return NewMI; + } + + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return nullptr; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return nullptr; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; +} + +bool TargetInstrInfo::hasReassociableOperands( + const MachineInstr &Inst, const MachineBasicBlock *MBB) const { + const MachineOperand &Op1 = Inst.getOperand(1); + const MachineOperand &Op2 = Inst.getOperand(2); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + + // We need virtual register definitions for the operands that we will + // reassociate. + MachineInstr *MI1 = nullptr; + MachineInstr *MI2 = nullptr; + if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg())) + MI1 = MRI.getUniqueVRegDef(Op1.getReg()); + if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg())) + MI2 = MRI.getUniqueVRegDef(Op2.getReg()); + + // And they need to be in the trace (otherwise, they won't have a depth). + return MI1 && MI2 && MI1->getParent() == MBB && MI2->getParent() == MBB; +} + +bool TargetInstrInfo::hasReassociableSibling(const MachineInstr &Inst, + bool &Commuted) const { + const MachineBasicBlock *MBB = Inst.getParent(); + const MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineInstr *MI1 = MRI.getUniqueVRegDef(Inst.getOperand(1).getReg()); + MachineInstr *MI2 = MRI.getUniqueVRegDef(Inst.getOperand(2).getReg()); + unsigned AssocOpcode = Inst.getOpcode(); + + // If only one operand has the same opcode and it's the second source operand, + // the operands must be commuted. + Commuted = MI1->getOpcode() != AssocOpcode && MI2->getOpcode() == AssocOpcode; + if (Commuted) + std::swap(MI1, MI2); + + // 1. The previous instruction must be the same type as Inst. + // 2. The previous instruction must have virtual register definitions for its + // operands in the same basic block as Inst. + // 3. The previous instruction's result must only be used by Inst. + return MI1->getOpcode() == AssocOpcode && + hasReassociableOperands(*MI1, MBB) && + MRI.hasOneNonDBGUse(MI1->getOperand(0).getReg()); +} + +// 1. The operation must be associative and commutative. +// 2. The instruction must have virtual register definitions for its +// operands in the same basic block. +// 3. The instruction must have a reassociable sibling. +bool TargetInstrInfo::isReassociationCandidate(const MachineInstr &Inst, + bool &Commuted) const { + return isAssociativeAndCommutative(Inst) && + hasReassociableOperands(Inst, Inst.getParent()) && + hasReassociableSibling(Inst, Commuted); +} + +// The concept of the reassociation pass is that these operations can benefit +// from this kind of transformation: +// +// A = ? op ? +// B = A op X (Prev) +// C = B op Y (Root) +// --> +// A = ? op ? +// B = X op Y +// C = A op B +// +// breaking the dependency between A and B, allowing them to be executed in +// parallel (or back-to-back in a pipeline) instead of depending on each other. + +// FIXME: This has the potential to be expensive (compile time) while not +// improving the code at all. Some ways to limit the overhead: +// 1. Track successful transforms; bail out if hit rate gets too low. +// 2. Only enable at -O3 or some other non-default optimization level. +// 3. Pre-screen pattern candidates here: if an operand of the previous +// instruction is known to not increase the critical path, then don't match +// that pattern. +bool TargetInstrInfo::getMachineCombinerPatterns( + MachineInstr &Root, + SmallVectorImpl<MachineCombinerPattern> &Patterns) const { + + bool Commute; + if (isReassociationCandidate(Root, Commute)) { + // We found a sequence of instructions that may be suitable for a + // reassociation of operands to increase ILP. Specify each commutation + // possibility for the Prev instruction in the sequence and let the + // machine combiner decide if changing the operands is worthwhile. + if (Commute) { + Patterns.push_back(MachineCombinerPattern::REASSOC_AX_YB); + Patterns.push_back(MachineCombinerPattern::REASSOC_XA_YB); + } else { + Patterns.push_back(MachineCombinerPattern::REASSOC_AX_BY); + Patterns.push_back(MachineCombinerPattern::REASSOC_XA_BY); + } + return true; + } + + return false; +} + +/// Attempt the reassociation transformation to reduce critical path length. +/// See the above comments before getMachineCombinerPatterns(). +void TargetInstrInfo::reassociateOps( + MachineInstr &Root, MachineInstr &Prev, + MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const { + MachineFunction *MF = Root.getParent()->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); + const TargetRegisterClass *RC = Root.getRegClassConstraint(0, TII, TRI); + + // This array encodes the operand index for each parameter because the + // operands may be commuted. Each row corresponds to a pattern value, + // and each column specifies the index of A, B, X, Y. + unsigned OpIdx[4][4] = { + { 1, 1, 2, 2 }, + { 1, 2, 2, 1 }, + { 2, 1, 1, 2 }, + { 2, 2, 1, 1 } + }; + + int Row; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: Row = 0; break; + case MachineCombinerPattern::REASSOC_AX_YB: Row = 1; break; + case MachineCombinerPattern::REASSOC_XA_BY: Row = 2; break; + case MachineCombinerPattern::REASSOC_XA_YB: Row = 3; break; + default: llvm_unreachable("unexpected MachineCombinerPattern"); + } + + MachineOperand &OpA = Prev.getOperand(OpIdx[Row][0]); + MachineOperand &OpB = Root.getOperand(OpIdx[Row][1]); + MachineOperand &OpX = Prev.getOperand(OpIdx[Row][2]); + MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]); + MachineOperand &OpC = Root.getOperand(0); + + unsigned RegA = OpA.getReg(); + unsigned RegB = OpB.getReg(); + unsigned RegX = OpX.getReg(); + unsigned RegY = OpY.getReg(); + unsigned RegC = OpC.getReg(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) + MRI.constrainRegClass(RegA, RC); + if (TargetRegisterInfo::isVirtualRegister(RegB)) + MRI.constrainRegClass(RegB, RC); + if (TargetRegisterInfo::isVirtualRegister(RegX)) + MRI.constrainRegClass(RegX, RC); + if (TargetRegisterInfo::isVirtualRegister(RegY)) + MRI.constrainRegClass(RegY, RC); + if (TargetRegisterInfo::isVirtualRegister(RegC)) + MRI.constrainRegClass(RegC, RC); + + // Create a new virtual register for the result of (X op Y) instead of + // recycling RegB because the MachineCombiner's computation of the critical + // path requires a new register definition rather than an existing one. + unsigned NewVR = MRI.createVirtualRegister(RC); + InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); + + unsigned Opcode = Root.getOpcode(); + bool KillA = OpA.isKill(); + bool KillX = OpX.isKill(); + bool KillY = OpY.isKill(); + + // Create new instructions for insertion. + MachineInstrBuilder MIB1 = + BuildMI(*MF, Prev.getDebugLoc(), TII->get(Opcode), NewVR) + .addReg(RegX, getKillRegState(KillX)) + .addReg(RegY, getKillRegState(KillY)); + MachineInstrBuilder MIB2 = + BuildMI(*MF, Root.getDebugLoc(), TII->get(Opcode), RegC) + .addReg(RegA, getKillRegState(KillA)) + .addReg(NewVR, getKillRegState(true)); + + setSpecialOperandAttr(Root, Prev, *MIB1, *MIB2); + + // Record new instructions for insertion and old instructions for deletion. + InsInstrs.push_back(MIB1); + InsInstrs.push_back(MIB2); + DelInstrs.push_back(&Prev); + DelInstrs.push_back(&Root); +} + +void TargetInstrInfo::genAlternativeCodeSequence( + MachineInstr &Root, MachineCombinerPattern Pattern, + SmallVectorImpl<MachineInstr *> &InsInstrs, + SmallVectorImpl<MachineInstr *> &DelInstrs, + DenseMap<unsigned, unsigned> &InstIdxForVirtReg) const { + MachineRegisterInfo &MRI = Root.getParent()->getParent()->getRegInfo(); + + // Select the previous instruction in the sequence based on the input pattern. + MachineInstr *Prev = nullptr; + switch (Pattern) { + case MachineCombinerPattern::REASSOC_AX_BY: + case MachineCombinerPattern::REASSOC_XA_BY: + Prev = MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); + break; + case MachineCombinerPattern::REASSOC_AX_YB: + case MachineCombinerPattern::REASSOC_XA_YB: + Prev = MRI.getUniqueVRegDef(Root.getOperand(2).getReg()); + break; + default: + break; + } + + assert(Prev && "Unknown pattern for machine combiner"); + + reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); + return; +} + +/// foldMemoryOperand - Same as the previous version except it allows folding +/// of any load and store from / to any address, not just from a specific +/// stack slot. +MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + ArrayRef<unsigned> Ops, + MachineInstr *LoadMI) const { + assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); +#endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + + // Ask the target to do the actual folding. + MachineInstr *NewMI = nullptr; + int FrameIndex = 0; + + if ((MI->getOpcode() == TargetOpcode::STACKMAP || + MI->getOpcode() == TargetOpcode::PATCHPOINT) && + isLoadFromStackSlot(LoadMI, FrameIndex)) { + // Fold stackmap/patchpoint. + NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this); + if (NewMI) + NewMI = MBB.insert(MI, NewMI); + } else { + // Ask the target to do the actual folding. + NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, LoadMI); + } + + if (!NewMI) return nullptr; + + // Copy the memoperands from the load to the folded instruction. + if (MI->memoperands_empty()) { + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + } + else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } + return NewMI; +} + +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Remat clients assume operand 0 is the defined register. + if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) + return false; + unsigned DefReg = MI->getOperand(0).getReg(); + + // A sub-register definition can only be rematerialized if the instruction + // doesn't read the other parts of the register. Otherwise it is really a + // read-modify-write operation on the full virtual register which cannot be + // moved safely. + if (TargetRegisterInfo::isVirtualRegister(DefReg) && + MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg)) + return false; + + // A load from a fixed stack slot can be rematerialized. This may be + // redundant with subsequent checks, but it's target-independent, + // simple, and a common case. + int FrameIdx = 0; + if (isLoadFromStackSlot(MI, FrameIdx) && + MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + return true; + + // Avoid instructions obviously unsafe for remat. + if (MI->isNotDuplicable() || MI->mayStore() || + MI->hasUnmodeledSideEffects()) + return false; + + // Don't remat inline asm. We have no idea how expensive it is + // even if it's side effect free. + if (MI->isInlineAsm()) + return false; + + // Avoid instructions which load from potentially varying memory. + if (MI->mayLoad() && !MI->isInvariantLoad(AA)) + return false; + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI.isConstantPhysReg(Reg, MF)) + return false; + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } + + // Only allow one virtual-register def. There may be multiple defs of the + // same virtual register, though. + if (MO.isDef() && Reg != DefReg) + return false; + + // Don't allow any virtual-register uses. Rematting an instruction with + // virtual register uses would length the live ranges of the uses, which + // is not necessarily a good idea, certainly not "trivial". + if (MO.isUse()) + return false; + } + + // Everything checked out. + return true; +} + +int TargetInstrInfo::getSPAdjust(const MachineInstr *MI) const { + const MachineFunction *MF = MI->getParent()->getParent(); + const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering(); + bool StackGrowsDown = + TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown; + + unsigned FrameSetupOpcode = getCallFrameSetupOpcode(); + unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode(); + + if (MI->getOpcode() != FrameSetupOpcode && + MI->getOpcode() != FrameDestroyOpcode) + return 0; + + int SPAdj = MI->getOperand(0).getImm(); + SPAdj = TFI->alignSPAdjust(SPAdj); + + if ((!StackGrowsDown && MI->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && MI->getOpcode() == FrameDestroyOpcode)) + SPAdj = -SPAdj; + + return SPAdj; +} + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Terminators and labels can't be scheduled around. + if (MI->isTerminator() || MI->isPosition()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + return MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI); +} + +// Provide a global flag for disabling the PreRA hazard recognizer that targets +// may choose to honor. +bool TargetInstrInfo::usePreRAHazardRecognizer() const { + return !DisableHazardRecognizer; +} + +// Default implementation of CreateTargetRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { + // Dummy hazard recognizer allows all instructions to issue. + return new ScheduleHazardRecognizer(); +} + +// Default implementation of CreateTargetMIHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "misched"); +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG latency interface. +//===----------------------------------------------------------------------===// + +int +TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + +//===----------------------------------------------------------------------===// +// MachineInstr latency interface. +//===----------------------------------------------------------------------===// + +unsigned +TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + int UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps >= 0) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + +/// Return the default expected latency for a def based on it's opcode. +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel &SchedModel, + const MachineInstr *DefMI) const { + if (DefMI->isTransient()) + return 0; + if (DefMI->mayLoad()) + return SchedModel.LoadLatency; + if (isHighLatencyDef(DefMI->getOpcode())) + return SchedModel.HighLatency; + return 1; +} + +unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const { + return 0; +} + +unsigned TargetInstrInfo:: +getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return MI->mayLoad() ? 2 : 1; + + return ItinData->getStageLatency(MI->getDesc().getSchedClass()); +} + +bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel, + const MachineInstr *DefMI, + unsigned DefIdx) const { + const InstrItineraryData *ItinData = SchedModel.getInstrItineraries(); + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 1); +} + +/// Both DefMI and UseMI must be valid. By default, call directly to the +/// itinerary. This may be overriden by the target. +int TargetInstrInfo:: +getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned UseClass = UseMI->getDesc().getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +/// If we can determine the operand latency from the def only, without itinerary +/// lookup, do so. Otherwise return -1. +int TargetInstrInfo::computeDefOperandLatency( + const InstrItineraryData *ItinData, + const MachineInstr *DefMI) const { + + // Let the target hook getInstrLatency handle missing itineraries. + if (!ItinData) + return getInstrLatency(ItinData, DefMI); + + if(ItinData->isEmpty()) + return defaultDefLatency(ItinData->SchedModel, DefMI); + + // ...operand lookup required + return -1; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. +/// +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + + int DefLatency = computeDefOperandLatency(ItinData, DefMI); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} + +bool TargetInstrInfo::getRegSequenceInputs( + const MachineInstr &MI, unsigned DefIdx, + SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const { + assert((MI.isRegSequence() || + MI.isRegSequenceLike()) && "Instruction do not have the proper type"); + + if (!MI.isRegSequence()) + return getRegSequenceLikeInputs(MI, DefIdx, InputRegs); + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + assert(DefIdx == 0 && "REG_SEQUENCE only has one def"); + for (unsigned OpIdx = 1, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx; + OpIdx += 2) { + const MachineOperand &MOReg = MI.getOperand(OpIdx); + const MachineOperand &MOSubIdx = MI.getOperand(OpIdx + 1); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + // Record Reg:SubReg, SubIdx. + InputRegs.push_back(RegSubRegPairAndIdx(MOReg.getReg(), MOReg.getSubReg(), + (unsigned)MOSubIdx.getImm())); + } + return true; +} + +bool TargetInstrInfo::getExtractSubregInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPairAndIdx &InputReg) const { + assert((MI.isExtractSubreg() || + MI.isExtractSubregLike()) && "Instruction do not have the proper type"); + + if (!MI.isExtractSubreg()) + return getExtractSubregLikeInputs(MI, DefIdx, InputReg); + + // We are looking at: + // Def = EXTRACT_SUBREG v0.sub1, sub0. + assert(DefIdx == 0 && "EXTRACT_SUBREG only has one def"); + const MachineOperand &MOReg = MI.getOperand(1); + const MachineOperand &MOSubIdx = MI.getOperand(2); + assert(MOSubIdx.isImm() && + "The subindex of the extract_subreg is not an immediate"); + + InputReg.Reg = MOReg.getReg(); + InputReg.SubReg = MOReg.getSubReg(); + InputReg.SubIdx = (unsigned)MOSubIdx.getImm(); + return true; +} + +bool TargetInstrInfo::getInsertSubregInputs( + const MachineInstr &MI, unsigned DefIdx, + RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const { + assert((MI.isInsertSubreg() || + MI.isInsertSubregLike()) && "Instruction do not have the proper type"); + + if (!MI.isInsertSubreg()) + return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg); + + // We are looking at: + // Def = INSERT_SEQUENCE v0, v1, sub0. + assert(DefIdx == 0 && "INSERT_SUBREG only has one def"); + const MachineOperand &MOBaseReg = MI.getOperand(1); + const MachineOperand &MOInsertedReg = MI.getOperand(2); + const MachineOperand &MOSubIdx = MI.getOperand(3); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + BaseReg.Reg = MOBaseReg.getReg(); + BaseReg.SubReg = MOBaseReg.getSubReg(); + + InsertedReg.Reg = MOInsertedReg.getReg(); + InsertedReg.SubReg = MOInsertedReg.getSubReg(); + InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm(); + return true; +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp new file mode 100644 index 0000000..36a31c9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -0,0 +1,1717 @@ +//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLoweringBase class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/StackMaps.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <cctype> +using namespace llvm; + +static cl::opt<bool> JumpIsExpensiveOverride( + "jump-is-expensive", cl::init(false), + cl::desc("Do not create extra branches to split comparison logic."), + cl::Hidden); + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names, const Triple &TT) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; + Names[RTLIB::SDIV_I8] = "__divqi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = nullptr; + Names[RTLIB::SDIVREM_I16] = nullptr; + Names[RTLIB::SDIVREM_I32] = nullptr; + Names[RTLIB::SDIVREM_I64] = nullptr; + Names[RTLIB::SDIVREM_I128] = nullptr; + Names[RTLIB::UDIVREM_I8] = nullptr; + Names[RTLIB::UDIVREM_I16] = nullptr; + Names[RTLIB::UDIVREM_I32] = nullptr; + Names[RTLIB::UDIVREM_I64] = nullptr; + Names[RTLIB::UDIVREM_I128] = nullptr; + + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_F128] = "__addtf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_F128] = "__subtf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_F128] = "__multf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_F128] = "__divtf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_F128] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_F128] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_F128] = "__powitf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_F128] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_F128] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_F128] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_F128] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_F128] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_F128] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_F128] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_F128] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_F128] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_F128] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_F128] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_F128] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_F128] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::FMIN_F32] = "fminf"; + Names[RTLIB::FMIN_F64] = "fmin"; + Names[RTLIB::FMIN_F80] = "fminl"; + Names[RTLIB::FMIN_F128] = "fminl"; + Names[RTLIB::FMIN_PPCF128] = "fminl"; + Names[RTLIB::FMAX_F32] = "fmaxf"; + Names[RTLIB::FMAX_F64] = "fmax"; + Names[RTLIB::FMAX_F80] = "fmaxl"; + Names[RTLIB::FMAX_F128] = "fmaxl"; + Names[RTLIB::FMAX_PPCF128] = "fmaxl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; + Names[RTLIB::COPYSIGN_F32] = "copysignf"; + Names[RTLIB::COPYSIGN_F64] = "copysign"; + Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_F128] = "copysignl"; + Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; + Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F16] = "__truncdfhf2"; + Names[RTLIB::FPROUND_F80_F16] = "__truncxfhf2"; + Names[RTLIB::FPROUND_F128_F16] = "__trunctfhf2"; + Names[RTLIB::FPROUND_PPCF128_F16] = "__trunctfhf2"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::OEQ_F128] = "__eqtf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::UNE_F128] = "__netf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OGE_F128] = "__getf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLT_F128] = "__lttf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OLE_F128] = "__letf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::OGT_F128] = "__gttf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::UO_F128] = "__unordtf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::O_F128] = "__unordtf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; + Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; + Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; + Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; + Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; + Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; + Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; + Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; + Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; + Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; + Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; + + if (TT.getEnvironment() == Triple::GNU) { + Names[RTLIB::SINCOS_F32] = "sincosf"; + Names[RTLIB::SINCOS_F64] = "sincos"; + Names[RTLIB::SINCOS_F80] = "sincosl"; + Names[RTLIB::SINCOS_F128] = "sincosl"; + Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + } else { + // These are generally not available. + Names[RTLIB::SINCOS_F32] = nullptr; + Names[RTLIB::SINCOS_F64] = nullptr; + Names[RTLIB::SINCOS_F80] = nullptr; + Names[RTLIB::SINCOS_F128] = nullptr; + Names[RTLIB::SINCOS_PPCF128] = nullptr; + } + + if (!TT.isOSOpenBSD()) { + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; + } else { + // These are generally not available. + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = nullptr; + } + + // For f16/f32 conversions, Darwin uses the standard naming scheme, instead + // of the gnueabi-style __gnu_*_ieee. + // FIXME: What about other targets? + if (TT.isOSDarwin()) { + Names[RTLIB::FPEXT_F16_F32] = "__extendhfsf2"; + Names[RTLIB::FPROUND_F32_F16] = "__truncsfhf2"; + } +} + +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f16) { + if (RetVT == MVT::f32) + return FPEXT_F16_F32; + } else if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + if (RetVT == MVT::f128) + return FPEXT_F32_F128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::f128) + return FPEXT_F64_F128; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { + if (RetVT == MVT::f16) { + if (OpVT == MVT::f32) + return FPROUND_F32_F16; + if (OpVT == MVT::f64) + return FPROUND_F64_F16; + if (OpVT == MVT::f80) + return FPROUND_F80_F16; + if (OpVT == MVT::f128) + return FPROUND_F128_F16; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F16; + } else if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::f128) + return FPROUND_F128_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::f128) + return FPROUND_F128_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOSINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOUINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +RTLIB::Libcall RTLIB::getATOMIC(unsigned Opc, MVT VT) { +#define OP_TO_LIBCALL(Name, Enum) \ + case Name: \ + switch (VT.SimpleTy) { \ + default: \ + return UNKNOWN_LIBCALL; \ + case MVT::i8: \ + return Enum##_1; \ + case MVT::i16: \ + return Enum##_2; \ + case MVT::i32: \ + return Enum##_4; \ + case MVT::i64: \ + return Enum##_8; \ + case MVT::i128: \ + return Enum##_16; \ + } + + switch (Opc) { + OP_TO_LIBCALL(ISD::ATOMIC_SWAP, SYNC_LOCK_TEST_AND_SET) + OP_TO_LIBCALL(ISD::ATOMIC_CMP_SWAP, SYNC_VAL_COMPARE_AND_SWAP) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_ADD, SYNC_FETCH_AND_ADD) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_SUB, SYNC_FETCH_AND_SUB) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_AND, SYNC_FETCH_AND_AND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_OR, SYNC_FETCH_AND_OR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_XOR, SYNC_FETCH_AND_XOR) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_NAND, SYNC_FETCH_AND_NAND) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MAX, SYNC_FETCH_AND_MAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMAX, SYNC_FETCH_AND_UMAX) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_MIN, SYNC_FETCH_AND_MIN) + OP_TO_LIBCALL(ISD::ATOMIC_LOAD_UMIN, SYNC_FETCH_AND_UMIN) + } + +#undef OP_TO_LIBCALL + + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::UNE_F128] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OGE_F128] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLT_F128] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OLE_F128] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::OGT_F128] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::UO_F128] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; + CCs[RTLIB::O_F128] = ISD::SETEQ; +} + +/// NOTE: The TargetMachine owns TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { + initActions(); + + // Perform these initializations only once. + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + HasMultipleConditionRegisters = false; + HasExtractBitsInsn = false; + FsqrtIsCheap = false; + JumpIsExpensive = JumpIsExpensiveOverride; + PredictableSelectIsExpensive = false; + MaskAndBranchFoldingIsLegal = false; + EnableExtLdPromotion = false; + HasFloatingPointExceptions = true; + StackPointerRegisterToSaveRestore = 0; + BooleanContents = UndefinedBooleanContent; + BooleanFloatContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + GatherAllAliasesMaxDepth = 6; + MinStackArgumentAlignment = 1; + InsertFencesForAtomic = false; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +void TargetLoweringBase::initActions() { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + + // Set default actions for various operations. + for (MVT VT : MVT::all_valuetypes()) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, VT, Expand); + setIndexedStoreAction(IM, VT, Expand); + } + + // Most backends expect to see the node which just returns the value loaded. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand); + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FMINNAN, VT, Expand); + setOperationAction(ISD::FMAXNAN, VT, Expand); + setOperationAction(ISD::FMAD, VT, Expand); + setOperationAction(ISD::SMIN, VT, Expand); + setOperationAction(ISD::SMAX, VT, Expand); + setOperationAction(ISD::UMIN, VT, Expand); + setOperationAction(ISD::UMAX, VT, Expand); + + // Overflow operations default to expand + setOperationAction(ISD::SADDO, VT, Expand); + setOperationAction(ISD::SSUBO, VT, Expand); + setOperationAction(ISD::UADDO, VT, Expand); + setOperationAction(ISD::USUBO, VT, Expand); + setOperationAction(ISD::SMULO, VT, Expand); + setOperationAction(ISD::UMULO, VT, Expand); + + setOperationAction(ISD::BITREVERSE, VT, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FROUND, VT, Expand); + + // These operations default to expand for vector types. + if (VT.isVector()) { + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand); + } + + // For most targets @llvm.get.dynamic.area.offest just returns 0. + setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // Most targets also ignore the @llvm.readcyclecounter intrinsic. + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use isFPImmLegal() + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + + // These library functions default to expand. + for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) { + setOperationAction(ISD::FLOG , VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); + setOperationAction(ISD::FLOG10, VT, Expand); + setOperationAction(ISD::FEXP , VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FFLOOR, VT, Expand); + setOperationAction(ISD::FMINNUM, VT, Expand); + setOperationAction(ISD::FMAXNUM, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); + setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FTRUNC, VT, Expand); + setOperationAction(ISD::FROUND, VT, Expand); + } + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); +} + +MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL, + EVT) const { + return MVT::getIntegerVT(8 * DL.getPointerSize(0)); +} + +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, + const DataLayout &DL) const { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return getScalarShiftAmountTy(DL, LHSTy); +} + +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + +void TargetLoweringBase::setJumpIsExpensive(bool isExpensive) { + // If the command-line option was specified, ignore this request. + if (!JumpIsExpensiveOverride.getNumOccurrences()) + JumpIsExpensive = isExpensive; +} + +TargetLoweringBase::LegalizeKind +TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const { + // If this is a simple type, use the ComputeRegisterProp mechanism. + if (VT.isSimple()) { + MVT SVT = VT.getSimpleVT(); + assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType)); + MVT NVT = TransformToType[SVT.SimpleTy]; + LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT); + + assert((LA == TypeLegal || LA == TypeSoftenFloat || + ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) && + "Promote may not follow Expand or Promote"); + + if (LA == TypeSplitVector) + return LegalizeKind(LA, + EVT::getVectorVT(Context, SVT.getVectorElementType(), + SVT.getVectorNumElements() / 2)); + if (LA == TypeScalarizeVector) + return LegalizeKind(LA, SVT.getVectorElementType()); + return LegalizeKind(LA, NVT); + } + + // Handle Extended Scalar Types. + if (!VT.isVector()) { + assert(VT.isInteger() && "Float types must be simple"); + unsigned BitSize = VT.getSizeInBits(); + // First promote to a power-of-two size, then expand if necessary. + if (BitSize < 8 || !isPowerOf2_32(BitSize)) { + EVT NVT = VT.getRoundIntegerType(Context); + assert(NVT != VT && "Unable to round integer VT"); + LegalizeKind NextStep = getTypeConversion(Context, NVT); + // Avoid multi-step promotion. + if (NextStep.first == TypePromoteInteger) + return NextStep; + // Return rounded integer type. + return LegalizeKind(TypePromoteInteger, NVT); + } + + return LegalizeKind(TypeExpandInteger, + EVT::getIntegerVT(Context, VT.getSizeInBits() / 2)); + } + + // Handle vector types. + unsigned NumElts = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + + // Vectors with only one element are always scalarized. + if (NumElts == 1) + return LegalizeKind(TypeScalarizeVector, EltVT); + + // Try to widen vector elements until the element type is a power of two and + // promote it to a legal type later on, for example: + // <3 x i8> -> <4 x i8> -> <4 x i32> + if (EltVT.isInteger()) { + // Vectors with a number of elements that is not a power of two are always + // widened, for example <3 x i8> -> <4 x i8>. + if (!VT.isPow2VectorType()) { + NumElts = (unsigned)NextPowerOf2(NumElts); + EVT NVT = EVT::getVectorVT(Context, EltVT, NumElts); + return LegalizeKind(TypeWidenVector, NVT); + } + + // Examine the element type. + LegalizeKind LK = getTypeConversion(Context, EltVT); + + // If type is to be expanded, split the vector. + // <4 x i140> -> <2 x i140> + if (LK.first == TypeExpandInteger) + return LegalizeKind(TypeSplitVector, + EVT::getVectorVT(Context, EltVT, NumElts / 2)); + + // Promote the integer element types until a legal vector type is found + // or until the element integer type is too big. If a legal type was not + // found, fallback to the usual mechanism of widening/splitting the + // vector. + EVT OldEltVT = EltVT; + while (1) { + // Increase the bitwidth of the element to the next pow-of-two + // (which is greater than 8 bits). + EltVT = EVT::getIntegerVT(Context, 1 + EltVT.getSizeInBits()) + .getRoundIntegerType(Context); + + // Stop trying when getting a non-simple element type. + // Note that vector elements may be greater than legal vector element + // types. Example: X86 XMM registers hold 64bit element on 32bit + // systems. + if (!EltVT.isSimple()) + break; + + // Build a new vector type and check if it is legal. + MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + // Found a legal promoted vector type. + if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal) + return LegalizeKind(TypePromoteInteger, + EVT::getVectorVT(Context, EltVT, NumElts)); + } + + // Reset the type to the unexpanded type if we did not find a legal vector + // type with a promoted vector element type. + EltVT = OldEltVT; + } + + // Try to widen the vector until a legal type is found. + // If there is no wider legal type, split the vector. + while (1) { + // Round up to the next power of 2. + NumElts = (unsigned)NextPowerOf2(NumElts); + + // If there is no simple vector type with this many elements then there + // cannot be a larger legal vector type. Note that this assumes that + // there are no skipped intermediate vector types in the simple types. + if (!EltVT.isSimple()) + break; + MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); + if (LargerVector == MVT()) + break; + + // If this type is legal then widen the vector. + if (ValueTypeActions.getTypeAction(LargerVector) == TypeLegal) + return LegalizeKind(TypeWidenVector, LargerVector); + } + + // Widen odd vectors to next power of two. + if (!VT.isPow2VectorType()) { + EVT NVT = VT.getPow2VectorType(Context); + return LegalizeKind(TypeWidenVector, NVT); + } + + // Vectors with illegal element types are expanded. + EVT NVT = EVT::getVectorVT(Context, EltVT, VT.getVectorNumElements() / 2); + return LegalizeKind(TypeSplitVector, NVT); +} + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT, + TargetLoweringBase *TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + MVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// Replace/modify any TargetFrameIndex operands with a targte-dependent +/// sequence of memory operands that is recognized by PrologEpilogInserter. +MachineBasicBlock* +TargetLoweringBase::emitPatchPoint(MachineInstr *MI, + MachineBasicBlock *MBB) const { + MachineFunction &MF = *MI->getParent()->getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + // We're handling multiple types of operands here: + // PATCHPOINT MetaArgs - live-in, read only, direct + // STATEPOINT Deopt Spill - live-through, read only, indirect + // STATEPOINT Deopt Alloca - live-through, read only, direct + // (We're currently conservative and mark the deopt slots read/write in + // practice.) + // STATEPOINT GC Spill - live-through, read/write, indirect + // STATEPOINT GC Alloca - live-through, read/write, direct + // The live-in vs live-through is handled already (the live through ones are + // all stack slots), but we need to handle the different type of stackmap + // operands and memory effects here. + + // MI changes inside this loop as we grow operands. + for(unsigned OperIdx = 0; OperIdx != MI->getNumOperands(); ++OperIdx) { + MachineOperand &MO = MI->getOperand(OperIdx); + if (!MO.isFI()) + continue; + + // foldMemoryOperand builds a new MI after replacing a single FI operand + // with the canonical set of five x86 addressing-mode operands. + int FI = MO.getIndex(); + MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), MI->getDesc()); + + // Copy operands before the frame-index. + for (unsigned i = 0; i < OperIdx; ++i) + MIB.addOperand(MI->getOperand(i)); + // Add frame index operands recognized by stackmaps.cpp + if (MFI.isStatepointSpillSlotObjectIndex(FI)) { + // indirect-mem-ref tag, size, #FI, offset. + // Used for spills inserted by StatepointLowering. This codepath is not + // used for patchpoints/stackmaps at all, for these spilling is done via + // foldMemoryOperand callback only. + assert(MI->getOpcode() == TargetOpcode::STATEPOINT && "sanity"); + MIB.addImm(StackMaps::IndirectMemRefOp); + MIB.addImm(MFI.getObjectSize(FI)); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + } else { + // direct-mem-ref tag, #FI, offset. + // Used by patchpoint, and direct alloca arguments to statepoints + MIB.addImm(StackMaps::DirectMemRefOp); + MIB.addOperand(MI->getOperand(OperIdx)); + MIB.addImm(0); + } + // Copy the operands after the frame index. + for (unsigned i = OperIdx + 1; i != MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + + // Inherit previous memory operands. + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + assert(MIB->mayLoad() && "Folded a stackmap use to a non-load!"); + + // Add a new memory operand for this FI. + assert(MFI.getObjectOffset(FI) != -1); + + unsigned Flags = MachineMemOperand::MOLoad; + if (MI->getOpcode() == TargetOpcode::STATEPOINT) { + Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOVolatile; + } + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), Flags, + MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI)); + MIB->addMemOperand(MF, MMO); + + // Replace the instruction and update the operand index. + MBB->insert(MachineBasicBlock::iterator(MI), MIB); + OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1; + MI->eraseFromParent(); + MI = MIB; + } + return MBB; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +// This function is in TargetLowering because it uses RegClassForVT which would +// need to be moved to TargetRegisterInfo and would necessitate moving +// isTypeLegal over as well - a massive change that would just require +// TargetLowering having a TargetRegisterInfo class member that it would use. +std::pair<const TargetRegisterClass *, uint8_t> +TargetLoweringBase::findRepresentativeClass(const TargetRegisterInfo *TRI, + MVT VT) const { + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. + const TargetRegisterClass *BestRC = RC; + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) + continue; + BestRC = SuperRC; + } + return std::make_pair(BestRC, 1); +} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLoweringBase::computeRegisterProperties( + const TargetRegisterInfo *TRI) { + static_assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE, + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == nullptr; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; + ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, + TypeExpandInteger); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (const MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + } + + // Decide how to handle f128. If the target does not have native f128 support, + // expand it to i128 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f128)) { + NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; + RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; + TransformToType[MVT::f128] = MVT::i128; + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); + } + + // Decide how to handle f32. If the target does not have native f32 support, + // expand it to i32 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f32)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); + } + + // Decide how to handle f16. If the target does not have native f16 support, + // promote it to f32, because there are no f16 library calls (except for + // conversions). + if (!isTypeLegal(MVT::f16)) { + NumRegistersForVT[MVT::f16] = NumRegistersForVT[MVT::f32]; + RegisterTypeForVT[MVT::f16] = RegisterTypeForVT[MVT::f32]; + TransformToType[MVT::f16] = MVT::f32; + ValueTypeActions.setTypeAction(MVT::f16, TypePromoteFloat); + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType) i; + if (isTypeLegal(VT)) + continue; + + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + bool IsLegalWiderType = false; + LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); + switch (PreferredAction) { + case TypePromoteInteger: { + // Try to promote the elements of integer vectors. If no legal + // promotion was found, fall through to the widen-vector method. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) + && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) + break; + } + case TypeWidenVector: { + // Try to widen the vector. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) + break; + } + case TypeSplitVector: + case TypeScalarizeVector: { + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, + NumIntermediates, RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + if (PreferredAction == TypeScalarizeVector) + ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); + else if (PreferredAction == TypeSplitVector) + ValueTypeActions.setTypeAction(VT, TypeSplitVector); + else + // Set type action according to the number of elements. + ValueTypeActions.setTypeAction(VT, NElts == 1 ? TypeScalarizeVector + : TypeSplitVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + break; + } + default: + llvm_unreachable("Unknown vector legalization action!"); + } + } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + std::tie(RRC, Cost) = findRepresentativeClass(TRI, (MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } +} + +EVT TargetLoweringBase::getSetCCResultType(const DataLayout &DL, LLVMContext &, + EVT VT) const { + assert(!VT.isVector() && "No default SetCC type for vectors!"); + return getPointerTy(DL).SimpleTy; +} + +MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { + return MVT::i32; // return the default value +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + EVT RegisterEVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterEVT)) { + IntermediateVT = RegisterEVT; + RegisterVT = RegisterEVT.getSimpleVT(); + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. + EVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(Context, NewVT); + RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(Type *ReturnType, AttributeSet attr, + SmallVectorImpl<ISD::OutputArg> &Outs, + const TargetLowering &TLI, const DataLayout &DL) { + SmallVector<EVT, 4> ValueVTs; + ComputeValueVTs(TLI, DL, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + Flags.setSExt(); + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); + } +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty, + const DataLayout &DL) const { + return DL.getABITypeAlignment(Ty); +} + +bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context, + const DataLayout &DL, EVT VT, + unsigned AddrSpace, + unsigned Alignment, + bool *Fast) const { + // Check if the specified alignment is sufficient based on the data layout. + // TODO: While using the data layout works in practice, a better solution + // would be to implement this check directly (make this a virtual function). + // For example, the ABI alignment may change based on software platform while + // this function should only be affected by hardware implementation. + Type *Ty = VT.getTypeForEVT(Context); + if (Alignment >= DL.getABITypeAlignment(Ty)) { + // Assume that an access that meets the ABI-specified alignment is fast. + if (Fast != nullptr) + *Fast = true; + return true; + } + + // This is a misaligned access. + return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast); +} + + +//===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast<InstructionOpcodes>(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case CleanupRet: return 0; + case CatchRet: return 0; + case CatchPad: return 0; + case CatchSwitch: return 0; + case CleanupPad: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::SDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case AddrSpaceCast: return ISD::ADDRSPACECAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair<int, MVT> +TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, + Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(DL, Ty); + + int Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + + // Keep legalizing the type. + MTy = LK.second; + } +} + +Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { + if (!TM.getTargetTriple().isAndroid()) + return nullptr; + + // Android provides a libc function to retrieve the address of the current + // thread's unsafe stack pointer. + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + Type *StackPtrTy = Type::getInt8PtrTy(M->getContext()); + Value *Fn = M->getOrInsertFunction("__safestack_pointer_address", + StackPtrTy->getPointerTo(0), nullptr); + return IRB.CreateCall(Fn); +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, + const AddrMode &AM, Type *Ty, + unsigned AS) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + default: // Don't allow n * r + return false; + } + + return true; +} diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp new file mode 100644 index 0000000..58ae9cc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -0,0 +1,1078 @@ +//===-- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements classes used to handle lowerings specific to common +// object file formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Mangler.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbolELF.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/COFF.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ELF.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; +using namespace dwarf; + +//===----------------------------------------------------------------------===// +// ELF +//===----------------------------------------------------------------------===// + +MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { + unsigned Encoding = getPersonalityEncoding(); + if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect) + return getContext().getOrCreateSymbol(StringRef("DW.ref.") + + TM.getSymbol(GV, Mang)->getName()); + if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr) + return TM.getSymbol(GV, Mang); + report_fatal_error("We do not support this DWARF encoding yet!"); +} + +void TargetLoweringObjectFileELF::emitPersonalityValue( + MCStreamer &Streamer, const DataLayout &DL, const MCSymbol *Sym) const { + SmallString<64> NameData("DW.ref."); + NameData += Sym->getName(); + MCSymbolELF *Label = + cast<MCSymbolELF>(getContext().getOrCreateSymbol(NameData)); + Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); + Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + StringRef Prefix = ".data."; + NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end()); + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; + MCSection *Sec = getContext().getELFSection(NameData, ELF::SHT_PROGBITS, + Flags, 0, Label->getName()); + unsigned Size = DL.getPointerSize(); + Streamer.SwitchSection(Sec); + Streamer.EmitValueToAlignment(DL.getPointerABIAlignment()); + Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + const MCExpr *E = MCConstantExpr::create(Size, getContext()); + Streamer.emitELFSize(Label, E); + Streamer.EmitLabel(Label); + + Streamer.EmitSymbolValue(Sym, Size); +} + +const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { + + if (Encoding & dwarf::DW_EH_PE_indirect) { + MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>(); + + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM); + + // Add information about the stub reference to ELFMMI so that the stub + // gets emitted by the asmprinter. + MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); + if (!StubSym.getPointer()) { + MCSymbol *Sym = TM.getSymbol(GV, Mang); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()), + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile:: + getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer); +} + +static SectionKind +getELFKindForNamedSection(StringRef Name, SectionKind K) { + // N.B.: The defaults used in here are no the same ones used in MC. + // We follow gcc, MC follows gas. For example, given ".section .eh_frame", + // both gas and MC will produce a section with no flags. Given + // section(".eh_frame") gcc will produce: + // + // .section .eh_frame,"a",@progbits + if (Name.empty() || Name[0] != '.') return K; + + // Some lame default implementation based on some magic section names. + if (Name == ".bss" || + Name.startswith(".bss.") || + Name.startswith(".gnu.linkonce.b.") || + Name.startswith(".llvm.linkonce.b.") || + Name == ".sbss" || + Name.startswith(".sbss.") || + Name.startswith(".gnu.linkonce.sb.") || + Name.startswith(".llvm.linkonce.sb.")) + return SectionKind::getBSS(); + + if (Name == ".tdata" || + Name.startswith(".tdata.") || + Name.startswith(".gnu.linkonce.td.") || + Name.startswith(".llvm.linkonce.td.")) + return SectionKind::getThreadData(); + + if (Name == ".tbss" || + Name.startswith(".tbss.") || + Name.startswith(".gnu.linkonce.tb.") || + Name.startswith(".llvm.linkonce.tb.")) + return SectionKind::getThreadBSS(); + + return K; +} + + +static unsigned getELFSectionType(StringRef Name, SectionKind K) { + + if (Name == ".init_array") + return ELF::SHT_INIT_ARRAY; + + if (Name == ".fini_array") + return ELF::SHT_FINI_ARRAY; + + if (Name == ".preinit_array") + return ELF::SHT_PREINIT_ARRAY; + + if (K.isBSS() || K.isThreadBSS()) + return ELF::SHT_NOBITS; + + return ELF::SHT_PROGBITS; +} + +static unsigned getELFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (!K.isMetadata()) + Flags |= ELF::SHF_ALLOC; + + if (K.isText()) + Flags |= ELF::SHF_EXECINSTR; + + if (K.isWriteable()) + Flags |= ELF::SHF_WRITE; + + if (K.isThreadLocal()) + Flags |= ELF::SHF_TLS; + + if (K.isMergeableCString() || K.isMergeableConst()) + Flags |= ELF::SHF_MERGE; + + if (K.isMergeableCString()) + Flags |= ELF::SHF_STRINGS; + + return Flags; +} + +static const Comdat *getELFComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return nullptr; + + if (C->getSelectionKind() != Comdat::Any) + report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" + + C->getName() + "' cannot be lowered."); + + return C; +} + +MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + StringRef SectionName = GV->getSection(); + + // Infer section flags from the section name if we can. + Kind = getELFKindForNamedSection(SectionName, Kind); + + StringRef Group = ""; + unsigned Flags = getELFSectionFlags(Kind); + if (const Comdat *C = getELFComdat(GV)) { + Group = C->getName(); + Flags |= ELF::SHF_GROUP; + } + return getContext().getELFSection(SectionName, + getELFSectionType(SectionName, Kind), Flags, + /*EntrySize=*/0, Group); +} + +/// Return the section prefix name used by options FunctionsSections and +/// DataSections. +static StringRef getSectionPrefixForGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text"; + if (Kind.isReadOnly()) + return ".rodata"; + if (Kind.isBSS()) + return ".bss"; + if (Kind.isThreadData()) + return ".tdata"; + if (Kind.isThreadBSS()) + return ".tbss"; + if (Kind.isData()) + return ".data"; + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return ".data.rel.ro"; +} + +static MCSectionELF * +selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV, + SectionKind Kind, Mangler &Mang, + const TargetMachine &TM, bool EmitUniqueSection, + unsigned Flags, unsigned *NextUniqueID) { + unsigned EntrySize = 0; + if (Kind.isMergeableCString()) { + if (Kind.isMergeable2ByteCString()) { + EntrySize = 2; + } else if (Kind.isMergeable4ByteCString()) { + EntrySize = 4; + } else { + EntrySize = 1; + assert(Kind.isMergeable1ByteCString() && "unknown string width"); + } + } else if (Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) { + EntrySize = 4; + } else if (Kind.isMergeableConst8()) { + EntrySize = 8; + } else { + assert(Kind.isMergeableConst16() && "unknown data width"); + EntrySize = 16; + } + } + + StringRef Group = ""; + if (const Comdat *C = getELFComdat(GV)) { + Flags |= ELF::SHF_GROUP; + Group = C->getName(); + } + + bool UniqueSectionNames = TM.getUniqueSectionNames(); + SmallString<128> Name; + if (Kind.isMergeableCString()) { + // We also need alignment here. + // FIXME: this is getting the alignment of the character, not the + // alignment of the global! + unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GV)); + + std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + "."; + Name = SizeSpec + utostr(Align); + } else if (Kind.isMergeableConst()) { + Name = ".rodata.cst"; + Name += utostr(EntrySize); + } else { + Name = getSectionPrefixForGlobal(Kind); + } + + if (EmitUniqueSection && UniqueSectionNames) { + Name.push_back('.'); + TM.getNameWithPrefix(Name, GV, Mang, true); + } + unsigned UniqueID = ~0; + if (EmitUniqueSection && !UniqueSectionNames) { + UniqueID = *NextUniqueID; + (*NextUniqueID)++; + } + return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags, + EntrySize, Group, UniqueID); +} + +MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + unsigned Flags = getELFSectionFlags(Kind); + + // If we have -ffunction-section or -fdata-section then we should emit the + // global value to a uniqued section specifically for it. + bool EmitUniqueSection = false; + if (!(Flags & ELF::SHF_MERGE) && !Kind.isCommon()) { + if (Kind.isText()) + EmitUniqueSection = TM.getFunctionSections(); + else + EmitUniqueSection = TM.getDataSections(); + } + EmitUniqueSection |= GV->hasComdat(); + + return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM, + EmitUniqueSection, Flags, &NextUniqueID); +} + +MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; + + return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(), + Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC, + &NextUniqueID); +} + +bool TargetLoweringObjectFileELF::shouldPutJumpTableInFunctionSection( + bool UsesLabelDifference, const Function &F) const { + // We can always create relative relocations, so use another section + // that can be marked non-executable. + return false; +} + +/// Given a mergeable constant with the specified size and relocation +/// information, return a section that it should be placed in. +MCSection *TargetLoweringObjectFileELF::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { + if (Kind.isMergeableConst4() && MergeableConst4Section) + return MergeableConst4Section; + if (Kind.isMergeableConst8() && MergeableConst8Section) + return MergeableConst8Section; + if (Kind.isMergeableConst16() && MergeableConst16Section) + return MergeableConst16Section; + if (Kind.isReadOnly()) + return ReadOnlySection; + + assert(Kind.isReadOnlyWithRel() && "Unknown section kind"); + return DataRelROSection; +} + +static MCSectionELF *getStaticStructorSection(MCContext &Ctx, bool UseInitArray, + bool IsCtor, unsigned Priority, + const MCSymbol *KeySym) { + std::string Name; + unsigned Type; + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE; + StringRef COMDAT = KeySym ? KeySym->getName() : ""; + + if (KeySym) + Flags |= ELF::SHF_GROUP; + + if (UseInitArray) { + if (IsCtor) { + Type = ELF::SHT_INIT_ARRAY; + Name = ".init_array"; + } else { + Type = ELF::SHT_FINI_ARRAY; + Name = ".fini_array"; + } + if (Priority != 65535) { + Name += '.'; + Name += utostr(Priority); + } + } else { + // The default scheme is .ctor / .dtor, so we have to invert the priority + // numbering. + if (IsCtor) + Name = ".ctors"; + else + Name = ".dtors"; + if (Priority != 65535) { + Name += '.'; + Name += utostr(65535 - Priority); + } + Type = ELF::SHT_PROGBITS; + } + + return Ctx.getELFSection(Name, Type, Flags, 0, COMDAT); +} + +MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getStaticStructorSection(getContext(), UseInitArray, true, Priority, + KeySym); +} + +MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getStaticStructorSection(getContext(), UseInitArray, false, Priority, + KeySym); +} + +void +TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { + UseInitArray = UseInitArray_; + if (!UseInitArray) + return; + + StaticCtorSection = getContext().getELFSection( + ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); + StaticDtorSection = getContext().getELFSection( + ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC); +} + +//===----------------------------------------------------------------------===// +// MachO +//===----------------------------------------------------------------------===// + +TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO() + : TargetLoweringObjectFile() { + SupportIndirectSymViaGOTPCRel = true; +} + +/// emitModuleFlags - Perform code emission for module flags. +void TargetLoweringObjectFileMachO:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + Mangler &Mang, const TargetMachine &TM) const { + unsigned VersionVal = 0; + unsigned ImageInfoFlags = 0; + MDNode *LinkerOptions = nullptr; + StringRef SectionVal; + + for (ArrayRef<Module::ModuleFlagEntry>::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + + // Ignore flags with 'Require' behavior. + if (MFE.Behavior == Module::Require) + continue; + + StringRef Key = MFE.Key->getString(); + Metadata *Val = MFE.Val; + + if (Key == "Objective-C Image Info Version") { + VersionVal = mdconst::extract<ConstantInt>(Val)->getZExtValue(); + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated" || + Key == "Objective-C Image Swift Version") { + ImageInfoFlags |= mdconst::extract<ConstantInt>(Val)->getZExtValue(); + } else if (Key == "Objective-C Image Info Section") { + SectionVal = cast<MDString>(Val)->getString(); + } else if (Key == "Linker Options") { + LinkerOptions = cast<MDNode>(Val); + } + } + + // Emit the linker options if present. + if (LinkerOptions) { + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + SmallVector<std::string, 4> StrOptions; + + // Convert to strings. + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + StrOptions.push_back(MDOption->getString()); + } + + Streamer.EmitLinkerOptions(StrOptions); + } + } + + // The section is mandatory. If we don't have it, then we don't have GC info. + if (SectionVal.empty()) return; + + StringRef Segment, Section; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(SectionVal, Segment, Section, + TAA, TAAParsed, StubSize); + if (!ErrorCode.empty()) + // If invalid, report the error with report_fatal_error. + report_fatal_error("Invalid section specifier '" + Section + "': " + + ErrorCode + "."); + + // Get the section. + MCSectionMachO *S = getContext().getMachOSection( + Segment, Section, TAA, StubSize, SectionKind::getData()); + Streamer.SwitchSection(S); + Streamer.EmitLabel(getContext(). + getOrCreateSymbol(StringRef("L_OBJC_IMAGE_INFO"))); + Streamer.EmitIntValue(VersionVal, 4); + Streamer.EmitIntValue(ImageInfoFlags, 4); + Streamer.AddBlankLine(); +} + +static void checkMachOComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return; + + report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() + + "' cannot be lowered."); +} + +MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + // Parse the section specifier and create it if valid. + StringRef Segment, Section; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; + + checkMachOComdat(GV); + + std::string ErrorCode = + MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, + TAA, TAAParsed, StubSize); + if (!ErrorCode.empty()) { + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getName() + + "' has an invalid section specifier '" + + GV->getSection() + "': " + ErrorCode + "."); + } + + // Get the section. + MCSectionMachO *S = + getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); + + // If TAA wasn't set by ParseSectionSpecifier() above, + // use the value returned by getMachOSection() as a default. + if (!TAAParsed) + TAA = S->getTypeAndAttributes(); + + // Okay, now that we got the section, verify that the TAA & StubSize agree. + // If the user declared multiple globals with different section flags, we need + // to reject it here. + if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) { + // If invalid, report the error with report_fatal_error. + report_fatal_error("Global variable '" + GV->getName() + + "' section type or attributes does not match previous" + " section specifier"); + } + + return S; +} + +MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + checkMachOComdat(GV); + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + + if (Kind.isText()) + return GV->isWeakForLinker() ? TextCoalSection : TextSection; + + // If this is weak/linkonce, put this in a coalescable section, either in text + // or data depending on if it is writable. + if (GV->isWeakForLinker()) { + if (Kind.isReadOnly()) + return ConstTextCoalSection; + return DataCoalSection; + } + + // FIXME: Alignment check should be handled by section classifier. + if (Kind.isMergeable1ByteCString() && + GV->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) + return CStringSection; + + // Do not put 16-bit arrays in the UString section if they have an + // externally visible label, this runs into issues with certain linker + // versions. + if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() && + GV->getParent()->getDataLayout().getPreferredAlignment( + cast<GlobalVariable>(GV)) < 32) + return UStringSection; + + // With MachO only variables whose corresponding symbol starts with 'l' or + // 'L' can be merged, so we only try merging GVs with private linkage. + if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) { + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16()) + return SixteenByteConstantSection; + } + + // Otherwise, if it is readonly, but not something we can specially optimize, + // just drop it in .const. + if (Kind.isReadOnly()) + return ReadOnlySection; + + // If this is marked const, put it into a const section. But if the dynamic + // linker needs to write to it, put it in the data segment. + if (Kind.isReadOnlyWithRel()) + return ConstDataSection; + + // Put zero initialized globals with strong external linkage in the + // DATA, __common section with the .zerofill directive. + if (Kind.isBSSExtern()) + return DataCommonSection; + + // Put zero initialized globals with local linkage in __DATA,__bss directive + // with the .zerofill directive (aka .lcomm). + if (Kind.isBSSLocal()) + return DataBSSSection; + + // Otherwise, just drop the variable in the normal data section. + return DataSection; +} + +MCSection *TargetLoweringObjectFileMachO::getSectionForConstant( + const DataLayout &DL, SectionKind Kind, const Constant *C) const { + // If this constant requires a relocation, we have to put it in the data + // segment, not in the text segment. + if (Kind.isData() || Kind.isReadOnlyWithRel()) + return ConstDataSection; + + if (Kind.isMergeableConst4()) + return FourByteConstantSection; + if (Kind.isMergeableConst8()) + return EightByteConstantSection; + if (Kind.isMergeableConst16()) + return SixteenByteConstantSection; + return ReadOnlySection; // .const +} + +const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference( + const GlobalValue *GV, unsigned Encoding, Mangler &Mang, + const TargetMachine &TM, MachineModuleInfo *MMI, + MCStreamer &Streamer) const { + // The mach-o version of this method defaults to returning a stub reference. + + if (Encoding & DW_EH_PE_indirect) { + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + MCSymbol *SSym = + getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : + MachOMMI.getGVStubEntry(SSym); + if (!StubSym.getPointer()) { + MCSymbol *Sym = TM.getSymbol(GV, Mang); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getTTypeReference(MCSymbolRefExpr::create(SSym, getContext()), + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang, + TM, MMI, Streamer); +} + +MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol( + const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, + MachineModuleInfo *MMI) const { + // The mach-o version of this method defaults to returning a stub reference. + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + + MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM); + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (!StubSym.getPointer()) { + MCSymbol *Sym = TM.getSymbol(GV, Mang); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return SSym; +} + +const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel( + const MCSymbol *Sym, const MCValue &MV, int64_t Offset, + MachineModuleInfo *MMI, MCStreamer &Streamer) const { + // Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation + // as 64-bit do, we replace the GOT equivalent by accessing the final symbol + // through a non_lazy_ptr stub instead. One advantage is that it allows the + // computation of deltas to final external symbols. Example: + // + // _extgotequiv: + // .long _extfoo + // + // _delta: + // .long _extgotequiv-_delta + // + // is transformed to: + // + // _delta: + // .long L_extfoo$non_lazy_ptr-(_delta+0) + // + // .section __IMPORT,__pointers,non_lazy_symbol_pointers + // L_extfoo$non_lazy_ptr: + // .indirect_symbol _extfoo + // .long 0 + // + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MCContext &Ctx = getContext(); + + // The offset must consider the original displacement from the base symbol + // since 32-bit targets don't have a GOTPCREL to fold the PC displacement. + Offset = -MV.getConstant(); + const MCSymbol *BaseSym = &MV.getSymB()->getSymbol(); + + // Access the final symbol via sym$non_lazy_ptr and generate the appropriated + // non_lazy_ptr stubs. + SmallString<128> Name; + StringRef Suffix = "$non_lazy_ptr"; + Name += MMI->getModule()->getDataLayout().getPrivateGlobalPrefix(); + Name += Sym->getName(); + Name += Suffix; + MCSymbol *Stub = Ctx.getOrCreateSymbol(Name); + + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl:: + StubValueTy(const_cast<MCSymbol *>(Sym), true /* access indirectly */); + + const MCExpr *BSymExpr = + MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx); + const MCExpr *LHS = + MCSymbolRefExpr::create(Stub, MCSymbolRefExpr::VK_None, Ctx); + + if (!Offset) + return MCBinaryExpr::createSub(LHS, BSymExpr, Ctx); + + const MCExpr *RHS = + MCBinaryExpr::createAdd(BSymExpr, MCConstantExpr::create(Offset, Ctx), Ctx); + return MCBinaryExpr::createSub(LHS, RHS, Ctx); +} + +static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo, + const MCSection &Section) { + if (!AsmInfo.isSectionAtomizableBySymbols(Section)) + return true; + + // If it is not dead stripped, it is safe to use private labels. + const MCSectionMachO &SMO = cast<MCSectionMachO>(Section); + if (SMO.hasAttribute(MachO::S_ATTR_NO_DEAD_STRIP)) + return true; + + return false; +} + +void TargetLoweringObjectFileMachO::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM); + bool CannotUsePrivateLabel = + !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection); + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + +//===----------------------------------------------------------------------===// +// COFF +//===----------------------------------------------------------------------===// + +static unsigned +getCOFFSectionFlags(SectionKind K) { + unsigned Flags = 0; + + if (K.isMetadata()) + Flags |= + COFF::IMAGE_SCN_MEM_DISCARDABLE; + else if (K.isText()) + Flags |= + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_CNT_CODE; + else if (K.isBSS()) + Flags |= + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isThreadLocal()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + else if (K.isReadOnly() || K.isReadOnlyWithRel()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ; + else if (K.isWriteable()) + Flags |= + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + + return Flags; +} + +static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + assert(C && "expected GV to have a Comdat!"); + + StringRef ComdatGVName = C->getName(); + const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName); + if (!ComdatGV) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' does not exist."); + + if (ComdatGV->getComdat() != C) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' is not a key for its COMDAT."); + + return ComdatGV; +} + +static int getSelectionForCOFF(const GlobalValue *GV) { + if (const Comdat *C = GV->getComdat()) { + const GlobalValue *ComdatKey = getComdatGVForCOFF(GV); + if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey)) + ComdatKey = GA->getBaseObject(); + if (ComdatKey == GV) { + switch (C->getSelectionKind()) { + case Comdat::Any: + return COFF::IMAGE_COMDAT_SELECT_ANY; + case Comdat::ExactMatch: + return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH; + case Comdat::Largest: + return COFF::IMAGE_COMDAT_SELECT_LARGEST; + case Comdat::NoDuplicates: + return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + case Comdat::SameSize: + return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE; + } + } else { + return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE; + } + } + return 0; +} + +MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + int Selection = 0; + unsigned Characteristics = getCOFFSectionFlags(Kind); + StringRef Name = GV->getSection(); + StringRef COMDATSymName = ""; + if (GV->hasComdat()) { + Selection = getSelectionForCOFF(GV); + const GlobalValue *ComdatGV; + if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + COMDATSymName = Sym->getName(); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + } else { + Selection = 0; + } + } + return getContext().getCOFFSection(Name, + Characteristics, + Kind, + COMDATSymName, + Selection); +} + +static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) { + if (Kind.isText()) + return ".text"; + if (Kind.isBSS()) + return ".bss"; + if (Kind.isThreadLocal()) + return ".tls$"; + if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) + return ".rdata"; + return ".data"; +} + +MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal( + const GlobalValue *GV, SectionKind Kind, Mangler &Mang, + const TargetMachine &TM) const { + // If we have -ffunction-sections then we should emit the global value to a + // uniqued section specifically for it. + bool EmitUniquedSection; + if (Kind.isText()) + EmitUniquedSection = TM.getFunctionSections(); + else + EmitUniquedSection = TM.getDataSections(); + + if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) { + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind); + + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + int Selection = getSelectionForCOFF(GV); + if (!Selection) + Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + const GlobalValue *ComdatGV; + if (GV->hasComdat()) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + StringRef COMDATSymName = Sym->getName(); + return getContext().getCOFFSection(Name, Characteristics, Kind, + COMDATSymName, Selection); + } else { + SmallString<256> TmpData; + Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true); + return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData, + Selection); + } + } + + if (Kind.isText()) + return TextSection; + + if (Kind.isThreadLocal()) + return TLSDataSection; + + if (Kind.isReadOnly() || Kind.isReadOnlyWithRel()) + return ReadOnlySection; + + // Note: we claim that common symbols are put in BSSSection, but they are + // really emitted with the magic .comm directive, which creates a symbol table + // entry but not a section. + if (Kind.isBSS() || Kind.isCommon()) + return BSSSection; + + return DataSection; +} + +void TargetLoweringObjectFileCOFF::getNameWithPrefix( + SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang, + const TargetMachine &TM) const { + bool CannotUsePrivateLabel = false; + if (GV->hasPrivateLinkage() && + ((isa<Function>(GV) && TM.getFunctionSections()) || + (isa<GlobalVariable>(GV) && TM.getDataSections()))) + CannotUsePrivateLabel = true; + + Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel); +} + +MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable( + const Function &F, Mangler &Mang, const TargetMachine &TM) const { + // If the function can be removed, produce a unique section so that + // the table doesn't prevent the removal. + const Comdat *C = F.getComdat(); + bool EmitUniqueSection = TM.getFunctionSections() || C; + if (!EmitUniqueSection) + return ReadOnlySection; + + // FIXME: we should produce a symbol for F instead. + if (F.hasPrivateLinkage()) + return ReadOnlySection; + + MCSymbol *Sym = TM.getSymbol(&F, Mang); + StringRef COMDATSymName = Sym->getName(); + + SectionKind Kind = SectionKind::getReadOnly(); + const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); + unsigned Characteristics = getCOFFSectionFlags(Kind); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + + return getContext().getCOFFSection(Name, Characteristics, Kind, COMDATSymName, + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); +} + +void TargetLoweringObjectFileCOFF:: +emitModuleFlags(MCStreamer &Streamer, + ArrayRef<Module::ModuleFlagEntry> ModuleFlags, + Mangler &Mang, const TargetMachine &TM) const { + MDNode *LinkerOptions = nullptr; + + // Look for the "Linker Options" flag, since it's the only one we support. + for (ArrayRef<Module::ModuleFlagEntry>::iterator + i = ModuleFlags.begin(), e = ModuleFlags.end(); i != e; ++i) { + const Module::ModuleFlagEntry &MFE = *i; + StringRef Key = MFE.Key->getString(); + Metadata *Val = MFE.Val; + if (Key == "Linker Options") { + LinkerOptions = cast<MDNode>(Val); + break; + } + } + if (!LinkerOptions) + return; + + // Emit the linker options to the linker .drectve section. According to the + // spec, this section is a space-separated string containing flags for linker. + MCSection *Sec = getDrectveSection(); + Streamer.SwitchSection(Sec); + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast<MDString>(MDOptions->getOperand(ii)); + // Lead with a space for consistency with our dllexport implementation. + std::string Directive(" "); + Directive.append(MDOption->getString()); + Streamer.EmitBytes(Directive); + } + } +} + +MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getContext().getAssociativeCOFFSection( + cast<MCSectionCOFF>(StaticCtorSection), KeySym); +} + +MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( + unsigned Priority, const MCSymbol *KeySym) const { + return getContext().getAssociativeCOFFSection( + cast<MCSectionCOFF>(StaticDtorSection), KeySym); +} + +void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal( + raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const { + if (!GV->hasDLLExportStorageClass() || GV->isDeclaration()) + return; + + const Triple &TT = getTargetTriple(); + + if (TT.isKnownWindowsMSVCEnvironment()) + OS << " /EXPORT:"; + else + OS << " -export:"; + + if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) { + std::string Flag; + raw_string_ostream FlagOS(Flag); + Mang.getNameWithPrefix(FlagOS, GV, false); + FlagOS.flush(); + if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix()) + OS << Flag.substr(1); + else + OS << Flag; + } else { + Mang.getNameWithPrefix(OS, GV, false); + } + + if (!GV->getValueType()->isFunctionTy()) { + if (TT.isKnownWindowsMSVCEnvironment()) + OS << ",DATA"; + else + OS << ",data"; + } +} diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp new file mode 100644 index 0000000..8d2048f --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -0,0 +1,49 @@ +//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the methods in the TargetOptions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +/// DisableFramePointerElim - This returns true if frame pointer elimination +/// optimization should be disabled for the given machine function. +bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { + // Check to see if we should eliminate all frame pointers. + if (MF.getSubtarget().getFrameLowering()->noFramePointerElim(MF)) + return true; + + // Check to see if we should eliminate non-leaf frame pointers. + if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf")) + return MF.getFrameInfo()->hasCalls(); + + return false; +} + +/// LessPreciseFPMAD - This flag return true when -enable-fp-mad option +/// is specified on the command line. When this flag is off(default), the +/// code generator is not allowed to generate mad (multiply add) if the +/// result is "less precise" than doing those operations individually. +bool TargetOptions::LessPreciseFPMAD() const { + return UnsafeFPMath || LessPreciseFPMADOption; +} + +/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume +/// that the rounding mode of the FPU can change from its default. +bool TargetOptions::HonorSignDependentRoundingFPMath() const { + return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; +} diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp new file mode 100644 index 0000000..0a7042a --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -0,0 +1,397 @@ +//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetRegisterInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" + +#define DEBUG_TYPE "target-reg-info" + +using namespace llvm; + +TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, + regclass_iterator RCB, regclass_iterator RCE, + const char *const *SRINames, + const unsigned *SRILaneMasks, + unsigned SRICoveringLanes) + : InfoDesc(ID), SubRegIndexNames(SRINames), + SubRegIndexLaneMasks(SRILaneMasks), + RegClassBegin(RCB), RegClassEnd(RCE), + CoveringLanes(SRICoveringLanes) { +} + +TargetRegisterInfo::~TargetRegisterInfo() {} + +namespace llvm { + +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx) { + return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); + else + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } + }); +} + +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } + + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } + + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); + }); +} + +Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + } else { + OS << PrintRegUnit(Unit, TRI); + } + }); +} + +Printable PrintLaneMask(LaneBitmask LaneMask) { + return Printable([LaneMask](raw_ostream &OS) { + OS << format("%08X", LaneMask); + }); +} + +} // End of llvm namespace + +/// getAllocatableClass - Return the maximal subclass of the given register +/// class that is alloctable, or NULL. +const TargetRegisterClass * +TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { + if (!RC || RC->isAllocatable()) + return RC; + + const unsigned *SubClass = RC->getSubClassMask(); + for (unsigned Base = 0, BaseE = getNumRegClasses(); + Base < BaseE; Base += 32) { + unsigned Idx = Base; + for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { + unsigned Offset = countTrailingZeros(Mask); + const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); + if (SubRC->isAllocatable()) + return SubRC; + Mask >>= Offset; + Idx += Offset + 1; + } + } + return nullptr; +} + +/// getMinimalPhysRegClass - Returns the Register Class of a physical +/// register of the given type, picking the most sub register class of +/// the right type that contains this physreg. +const TargetRegisterClass * +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const { + assert(isPhysicalRegister(reg) && "reg must be a physical register"); + + // Pick the most sub register class of the right type that contains + // this physreg. + const TargetRegisterClass* BestRC = nullptr; + for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ + const TargetRegisterClass* RC = *I; + if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && + (!BestRC || BestRC->hasSubClass(RC))) + BestRC = RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + +/// getAllocatableSetForRC - Toggle the bits that represent allocatable +/// registers for the specific register class. +static void getAllocatableSetForRC(const MachineFunction &MF, + const TargetRegisterClass *RC, BitVector &R){ + assert(RC->isAllocatable() && "invalid for nonallocatable sets"); + ArrayRef<MCPhysReg> Order = RC->getRawAllocationOrder(MF); + for (unsigned i = 0; i != Order.size(); ++i) + R.set(Order[i]); +} + +BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC) const { + BitVector Allocatable(getNumRegs()); + if (RC) { + // A register class with no allocatable subclass returns an empty set. + const TargetRegisterClass *SubClass = getAllocatableClass(RC); + if (SubClass) + getAllocatableSetForRC(MF, SubClass, Allocatable); + } else { + for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), + E = regclass_end(); I != E; ++I) + if ((*I)->isAllocatable()) + getAllocatableSetForRC(MF, *I, Allocatable); + } + + // Mask out the reserved registers + BitVector Reserved = getReservedRegs(MF); + Allocatable &= Reserved.flip(); + + return Allocatable; +} + +static inline +const TargetRegisterClass *firstCommonClass(const uint32_t *A, + const uint32_t *B, + const TargetRegisterInfo *TRI, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) { + const MVT VT(SVT); + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) + if (unsigned Common = *A++ & *B++) { + const TargetRegisterClass *RC = + TRI->getRegClass(I + countTrailingZeros(Common)); + if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + return RC; + } + return nullptr; +} + +const TargetRegisterClass * +TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT) const { + // First take care of the trivial cases. + if (A == B) + return A; + if (!A || !B) + return nullptr; + + // Register classes are ordered topologically, so the largest common + // sub-class it the common sub-class with the smallest ID. + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); +} + +const TargetRegisterClass * +TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned Idx) const { + assert(A && B && "Missing register class"); + assert(Idx && "Bad sub-register index"); + + // Find Idx in the list of super-register indices. + for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI) + if (RCI.getSubReg() == Idx) + // The bit mask contains all register classes that are projected into B + // by Idx. Find a class that is also a sub-class of A. + return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); + return nullptr; +} + +const TargetRegisterClass *TargetRegisterInfo:: +getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, + const TargetRegisterClass *RCB, unsigned SubB, + unsigned &PreA, unsigned &PreB) const { + assert(RCA && SubA && RCB && SubB && "Invalid arguments"); + + // Search all pairs of sub-register indices that project into RCA and RCB + // respectively. This is quadratic, but usually the sets are very small. On + // most targets like X86, there will only be a single sub-register index + // (e.g., sub_16bit projecting into GR16). + // + // The worst case is a register class like DPR on ARM. + // We have indices dsub_0..dsub_7 projecting into that class. + // + // It is very common that one register class is a sub-register of the other. + // Arrange for RCA to be the larger register so the answer will be found in + // the first iteration. This makes the search linear for the most common + // case. + const TargetRegisterClass *BestRC = nullptr; + unsigned *BestPreA = &PreA; + unsigned *BestPreB = &PreB; + if (RCA->getSize() < RCB->getSize()) { + std::swap(RCA, RCB); + std::swap(SubA, SubB); + std::swap(BestPreA, BestPreB); + } + + // Also terminate the search one we have found a register class as small as + // RCA. + unsigned MinSize = RCA->getSize(); + + for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { + unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); + for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) { + // Check if a common super-register class exists for this index pair. + const TargetRegisterClass *RC = + firstCommonClass(IA.getMask(), IB.getMask(), this); + if (!RC || RC->getSize() < MinSize) + continue; + + // The indexes must compose identically: PreA+SubA == PreB+SubB. + unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB); + if (FinalA != FinalB) + continue; + + // Is RC a better candidate than BestRC? + if (BestRC && RC->getSize() >= BestRC->getSize()) + continue; + + // Yes, RC is the smallest super-register seen so far. + BestRC = RC; + *BestPreA = IA.getSubReg(); + *BestPreB = IB.getSubReg(); + + // Bail early if we reached MinSize. We won't find a better candidate. + if (BestRC->getSize() == MinSize) + return BestRC; + } + } + return BestRC; +} + +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) { + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != nullptr; + } + + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; + + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; +} + +bool TargetRegisterInfo::shouldRewriteCopySrc(const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) const { + // If this source does not incur a cross register bank copy, use it. + return shareSameRegisterFile(*this, DefRC, DefSubReg, SrcRC, SrcSubReg); +} + +// Compute target-independent register allocator hints to help eliminate copies. +void +TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, + ArrayRef<MCPhysReg> Order, + SmallVectorImpl<MCPhysReg> &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM, + const LiveRegMatrix *Matrix) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + std::pair<unsigned, unsigned> Hint = MRI.getRegAllocationHint(VirtReg); + + // Hints with HintType != 0 were set by target-dependent code. + // Such targets must provide their own implementation of + // TRI::getRegAllocationHints to interpret those hint types. + assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Hint.second; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + return; + if (MRI.isReserved(Phys)) + return; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (std::find(Order.begin(), Order.end(), Phys) == Order.end()) + return; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); +} + +bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const { + return !MF.getFunction()->hasFnAttribute("no-realign-stack"); +} + +bool TargetRegisterInfo::needsStackRealignment( + const MachineFunction &MF) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const Function *F = MF.getFunction(); + unsigned StackAlign = TFI->getStackAlignment(); + bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || + F->hasFnAttribute(Attribute::StackAlignment)); + if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) { + if (canRealignStack(MF)) + return true; + DEBUG(dbgs() << "Can't realign function's stack: " << F->getName() << "\n"); + } + return false; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void +TargetRegisterInfo::dumpReg(unsigned Reg, unsigned SubRegIndex, + const TargetRegisterInfo *TRI) { + dbgs() << PrintReg(Reg, TRI, SubRegIndex) << "\n"; +} +#endif diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp new file mode 100644 index 0000000..1c4558c --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -0,0 +1,301 @@ +//===-- llvm/Target/TargetSchedule.cpp - Sched Machine Model ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a wrapper around MCSchedModel that allows the interface +// to benefit from information currently only available in TargetInstrInfo. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + +using namespace llvm; + +static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true), + cl::desc("Use TargetSchedModel for latency lookup")); + +static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true), + cl::desc("Use InstrItineraryData for latency lookup")); + +bool TargetSchedModel::hasInstrSchedModel() const { + return EnableSchedModel && SchedModel.hasInstrSchedModel(); +} + +bool TargetSchedModel::hasInstrItineraries() const { + return EnableSchedItins && !InstrItins.isEmpty(); +} + +static unsigned gcd(unsigned Dividend, unsigned Divisor) { + // Dividend and Divisor will be naturally swapped as needed. + while(Divisor) { + unsigned Rem = Dividend % Divisor; + Dividend = Divisor; + Divisor = Rem; + }; + return Dividend; +} +static unsigned lcm(unsigned A, unsigned B) { + unsigned LCM = (uint64_t(A) * B) / gcd(A, B); + assert((LCM >= A && LCM >= B) && "LCM overflow"); + return LCM; +} + +void TargetSchedModel::init(const MCSchedModel &sm, + const TargetSubtargetInfo *sti, + const TargetInstrInfo *tii) { + SchedModel = sm; + STI = sti; + TII = tii; + STI->initInstrItins(InstrItins); + + unsigned NumRes = SchedModel.getNumProcResourceKinds(); + ResourceFactors.resize(NumRes); + ResourceLCM = SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + if (NumUnits > 0) + ResourceLCM = lcm(ResourceLCM, NumUnits); + } + MicroOpFactor = ResourceLCM / SchedModel.IssueWidth; + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + unsigned NumUnits = SchedModel.getProcResource(Idx)->NumUnits; + ResourceFactors[Idx] = NumUnits ? (ResourceLCM / NumUnits) : 0; + } +} + +unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, + const MCSchedClassDesc *SC) const { + if (hasInstrItineraries()) { + int UOps = InstrItins.getNumMicroOps(MI->getDesc().getSchedClass()); + return (UOps >= 0) ? UOps : TII->getNumMicroOps(&InstrItins, MI); + } + if (hasInstrSchedModel()) { + if (!SC) + SC = resolveSchedClass(MI); + if (SC->isValid()) + return SC->NumMicroOps; + } + return MI->isTransient() ? 0 : 1; +} + +// The machine model may explicitly specify an invalid latency, which +// effectively means infinite latency. Since users of the TargetSchedule API +// don't know how to handle this, we convert it to a very large latency that is +// easy to distinguish when debugging the DAG but won't induce overflow. +static unsigned capLatency(int Cycles) { + return Cycles >= 0 ? Cycles : 1000; +} + +/// Return the MCSchedClassDesc for this instruction. Some SchedClasses require +/// evaluation of predicates that depend on instruction operands or flags. +const MCSchedClassDesc *TargetSchedModel:: +resolveSchedClass(const MachineInstr *MI) const { + + // Get the definition's scheduling class descriptor from this machine model. + unsigned SchedClass = MI->getDesc().getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SchedClass); + if (!SCDesc->isValid()) + return SCDesc; + +#ifndef NDEBUG + unsigned NIter = 0; +#endif + while (SCDesc->isVariant()) { + assert(++NIter < 6 && "Variants are nested deeper than the magic number"); + + SchedClass = STI->resolveSchedClass(SchedClass, MI, this); + SCDesc = SchedModel.getSchedClassDesc(SchedClass); + } + return SCDesc; +} + +/// Find the def index of this operand. This index maps to the machine model and +/// is independent of use operands. Def operands may be reordered with uses or +/// merged with uses without affecting the def index (e.g. before/after +/// regalloc). However, an instruction's def operands must never be reordered +/// with respect to each other. +static unsigned findDefIdx(const MachineInstr *MI, unsigned DefOperIdx) { + unsigned DefIdx = 0; + for (unsigned i = 0; i != DefOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isDef()) + ++DefIdx; + } + return DefIdx; +} + +/// Find the use index of this operand. This is independent of the instruction's +/// def operands. +/// +/// Note that uses are not determined by the operand's isUse property, which +/// is simply the inverse of isDef. Here we consider any readsReg operand to be +/// a "use". The machine model allows an operand to be both a Def and Use. +static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { + unsigned UseIdx = 0; + for (unsigned i = 0; i != UseOperIdx; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.readsReg()) + ++UseIdx; + } + return UseIdx; +} + +// Top-level API for clients that know the operand indices. +unsigned TargetSchedModel::computeOperandLatency( + const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *UseMI, unsigned UseOperIdx) const { + + if (!hasInstrSchedModel() && !hasInstrItineraries()) + return TII->defaultDefLatency(SchedModel, DefMI); + + if (hasInstrItineraries()) { + int OperLatency = 0; + if (UseMI) { + OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, + UseMI, UseOperIdx); + } + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = InstrItins.getOperandCycle(DefClass, DefOperIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = TII->getInstrLatency(&InstrItins, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + // Rather than directly querying InstrItins stage latency, we call a TII + // hook to allow subtargets to specialize latency. This hook is only + // applicable to the InstrItins model. InstrSchedModel should model all + // special cases without TII hooks. + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(SchedModel, DefMI)); + return InstrLatency; + } + // hasInstrSchedModel() + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); + if (DefIdx < SCDesc->NumWriteLatencyEntries) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(SCDesc, DefIdx); + unsigned WriteID = WLEntry->WriteResourceID; + unsigned Latency = capLatency(WLEntry->Cycles); + if (!UseMI) + return Latency; + + // Lookup the use's latency adjustment in SubtargetInfo. + const MCSchedClassDesc *UseDesc = resolveSchedClass(UseMI); + if (UseDesc->NumReadAdvanceEntries == 0) + return Latency; + unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap + return 0; + return Latency - Advance; + } + // If DefIdx does not exist in the model (e.g. implicit defs), then return + // unit latency (defaultDefLatency may be too conservative). +#ifndef NDEBUG + if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() + && SchedModel.isComplete()) { + errs() << "DefIdx " << DefIdx << " exceeds machine model writes for " + << *DefMI << " (Try with MCSchedModel.CompleteModel set to false)"; + llvm_unreachable("incomplete machine model"); + } +#endif + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(SchedModel, DefMI); +} + +unsigned +TargetSchedModel::computeInstrLatency(const MCSchedClassDesc &SCDesc) const { + unsigned Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc.NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = + STI->getWriteLatencyEntry(&SCDesc, DefIdx); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); + } + return Latency; +} + +unsigned TargetSchedModel::computeInstrLatency(unsigned Opcode) const { + assert(hasInstrSchedModel() && "Only call this function with a SchedModel"); + + unsigned SCIdx = TII->get(Opcode).getSchedClass(); + const MCSchedClassDesc *SCDesc = SchedModel.getSchedClassDesc(SCIdx); + + if (SCDesc->isValid() && !SCDesc->isVariant()) + return computeInstrLatency(*SCDesc); + + llvm_unreachable("No MI sched latency"); +} + +unsigned +TargetSchedModel::computeInstrLatency(const MachineInstr *MI, + bool UseDefaultDefLatency) const { + // For the itinerary model, fall back to the old subtarget hook. + // Allow subtargets to compute Bundle latencies outside the machine model. + if (hasInstrItineraries() || MI->isBundle() || + (!hasInstrSchedModel() && !UseDefaultDefLatency)) + return TII->getInstrLatency(&InstrItins, MI); + + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(MI); + if (SCDesc->isValid()) + return computeInstrLatency(*SCDesc); + } + return TII->defaultDefLatency(SchedModel, MI); +} + +unsigned TargetSchedModel:: +computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, + const MachineInstr *DepMI) const { + if (SchedModel.MicroOpBufferSize <= 1) + return 1; + + // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch + // WAW dependencies in the same cycle. + + // Treat predication as a data dependency for out-of-order cpus. In-order + // cpus do not need to treat predicated writes specially. + // + // TODO: The following hack exists because predication passes do not + // correctly append imp-use operands, and readsReg() strangely returns false + // for predicated defs. + unsigned Reg = DefMI->getOperand(DefOperIdx).getReg(); + const MachineFunction &MF = *DefMI->getParent()->getParent(); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(DepMI)) + return computeInstrLatency(DefMI); + + // If we have a per operand scheduling model, check if this def is writing + // an unbuffered resource. If so, it treated like an in-order cpu. + if (hasInstrSchedModel()) { + const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); + if (SCDesc->isValid()) { + for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), + *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) + return 1; + } + } + } + return 0; +} diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp new file mode 100644 index 0000000..c6bae24 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -0,0 +1,1811 @@ +//===-- TwoAddressInstructionPass.cpp - Two-Address instruction pass ------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TwoAddress instruction pass which is used +// by most register allocators. Two-Address instructions are rewritten +// from: +// +// A = B op C +// +// to: +// +// A = B +// A op= C +// +// Note that if a register allocator chooses to use this pass, that it +// has to be capable of handling the non-SSA nature of these rewritten +// virtual registers. +// +// It is also worth noting that the duplicate operand of the two +// address instruction is removed. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +using namespace llvm; + +#define DEBUG_TYPE "twoaddrinstr" + +STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); +STATISTIC(NumCommuted , "Number of instructions commuted to coalesce"); +STATISTIC(NumAggrCommuted , "Number of instructions aggressively commuted"); +STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); +STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); +STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); +STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); + +// Temporary flag to disable rescheduling. +static cl::opt<bool> +EnableRescheduling("twoaddr-reschedule", + cl::desc("Coalesce copies by rescheduling (default=true)"), + cl::init(true), cl::Hidden); + +namespace { +class TwoAddressInstructionPass : public MachineFunctionPass { + MachineFunction *MF; + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; + MachineRegisterInfo *MRI; + LiveVariables *LV; + LiveIntervals *LIS; + AliasAnalysis *AA; + CodeGenOpt::Level OptLevel; + + // The current basic block being processed. + MachineBasicBlock *MBB; + + // Keep track the distance of a MI from the start of the current basic block. + DenseMap<MachineInstr*, unsigned> DistanceMap; + + // Set of already processed instructions in the current block. + SmallPtrSet<MachineInstr*, 8> Processed; + + // A map from virtual registers to physical registers which are likely targets + // to be coalesced to due to copies from physical registers to virtual + // registers. e.g. v1024 = move r0. + DenseMap<unsigned, unsigned> SrcRegMap; + + // A map from virtual registers to physical registers which are likely targets + // to be coalesced to due to copies to physical registers from virtual + // registers. e.g. r1 = move v1024. + DenseMap<unsigned, unsigned> DstRegMap; + + bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, + MachineBasicBlock::iterator OldPos); + + bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen); + + bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef); + + bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist); + + bool commuteInstruction(MachineInstr *MI, + unsigned RegBIdx, unsigned RegCIdx, unsigned Dist); + + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); + + bool convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, unsigned Dist); + + bool isDefTooClose(unsigned Reg, unsigned Dist, MachineInstr *MI); + + bool rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + + bool tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute); + + bool tryInstructionCommute(MachineInstr *MI, + unsigned DstOpIdx, + unsigned BaseOpIdx, + bool BaseOpKilled, + unsigned Dist); + void scanUses(unsigned DstReg); + + void processCopy(MachineInstr *MI); + + typedef SmallVector<std::pair<unsigned, unsigned>, 4> TiedPairList; + typedef SmallDenseMap<unsigned, TiedPairList> TiedOperandMap; + bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); + void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); + void eliminateRegSequence(MachineBasicBlock::iterator&); + +public: + static char ID; // Pass identification, replacement for typeid + TwoAddressInstructionPass() : MachineFunctionPass(ID) { + initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<AAResultsWrapperPass>(); + AU.addPreserved<LiveVariables>(); + AU.addPreserved<SlotIndexes>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreservedID(MachineLoopInfoID); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + /// Pass entry point. + bool runOnMachineFunction(MachineFunction&) override; +}; +} // end anonymous namespace + +char TwoAddressInstructionPass::ID = 0; +INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", + "Two-Address instruction pass", false, false) + +char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; + +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); + +/// A two-address instruction has been converted to a three-address instruction +/// to avoid clobbering a register. Try to sink it past the instruction that +/// would kill the above mentioned register to reduce register pressure. +bool TwoAddressInstructionPass:: +sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, + MachineBasicBlock::iterator OldPos) { + // FIXME: Shouldn't we be trying to do this before we three-addressify the + // instruction? After this transformation is done, we no longer need + // the instruction to be in three-address form. + + // Check if it's safe to move this instruction. + bool SeenStore = true; // Be conservative. + if (!MI->isSafeToMove(AA, SeenStore)) + return false; + + unsigned DefReg = 0; + SmallSet<unsigned, 4> UseRegs; + + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse() && MOReg != SavedReg) + UseRegs.insert(MO.getReg()); + if (!MO.isDef()) + continue; + if (MO.isImplicit()) + // Don't try to move it if it implicitly defines a register. + return false; + if (DefReg) + // For now, don't move any instructions that define multiple registers. + return false; + DefReg = MO.getReg(); + } + + // Find the instruction that kills SavedReg. + MachineInstr *KillMI = nullptr; + if (LIS) { + LiveInterval &LI = LIS->getInterval(SavedReg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } + if (!KillMI) { + for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) { + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } + } + + // If we find the instruction that kills SavedReg, and it is in an + // appropriate location, we can try to sink the current instruction + // past it. + if (!KillMI || KillMI->getParent() != MBB || KillMI == MI || + KillMI == OldPos || KillMI->isTerminator()) + return false; + + // If any of the definitions are used by another instruction between the + // position and the kill use, then it's not safe to sink it. + // + // FIXME: This can be sped up if there is an easy way to query whether an + // instruction is before or after another instruction. Then we can use + // MachineRegisterInfo def / use instead. + MachineOperand *KillMO = nullptr; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + + unsigned NumVisited = 0; + for (MachineBasicBlock::iterator I = std::next(OldPos); I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (DefReg == MOReg) + return false; + + if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) { + if (OtherMI == KillMI && MOReg == SavedReg) + // Save the operand that kills the register. We want to unset the kill + // marker if we can sink MI past it. + KillMO = &MO; + else if (UseRegs.count(MOReg)) + // One of the uses is killed before the destination. + return false; + } + } + } + assert(KillMO && "Didn't find kill"); + + if (!LIS) { + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); + + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + } + + // Move instruction to its destination. + MBB->remove(MI); + MBB->insert(KillPos, MI); + + if (LIS) + LIS->handleMove(MI); + + ++Num3AddrSunk; + return true; +} + +/// Return the MachineInstr* if it is the single def of the Reg in current BB. +static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB, + const MachineRegisterInfo *MRI) { + MachineInstr *Ret = nullptr; + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != BB || DefMI.isDebugValue()) + continue; + if (!Ret) + Ret = &DefMI; + else if (Ret != &DefMI) + return nullptr; + } + return Ret; +} + +/// Check if there is a reversed copy chain from FromReg to ToReg: +/// %Tmp1 = copy %Tmp2; +/// %FromReg = copy %Tmp1; +/// %ToReg = add %FromReg ... +/// %Tmp2 = copy %ToReg; +/// MaxLen specifies the maximum length of the copy chain the func +/// can walk through. +bool TwoAddressInstructionPass::isRevCopyChain(unsigned FromReg, unsigned ToReg, + int Maxlen) { + unsigned TmpReg = FromReg; + for (int i = 0; i < Maxlen; i++) { + MachineInstr *Def = getSingleDef(TmpReg, MBB, MRI); + if (!Def || !Def->isCopy()) + return false; + + TmpReg = Def->getOperand(1).getReg(); + + if (TmpReg == ToReg) + return true; + } + return false; +} + +/// Return true if there are no intervening uses between the last instruction +/// in the MBB that defines the specified register and the two-address +/// instruction which is being processed. It also returns the last def location +/// by reference. +bool TwoAddressInstructionPass::noUseAfterLastDef(unsigned Reg, unsigned Dist, + unsigned &LastDef) { + LastDef = 0; + unsigned LastUse = Dist; + for (MachineOperand &MO : MRI->reg_operands(Reg)) { + MachineInstr *MI = MO.getParent(); + if (MI->getParent() != MBB || MI->isDebugValue()) + continue; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + continue; + if (MO.isUse() && DI->second < LastUse) + LastUse = DI->second; + if (MO.isDef() && DI->second > LastDef) + LastDef = DI->second; + } + + return !(LastUse > LastDef && LastUse < Dist); +} + +/// Return true if the specified MI is a copy instruction or an extract_subreg +/// instruction. It also returns the source and destination registers and +/// whether they are physical registers by reference. +static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, + unsigned &SrcReg, unsigned &DstReg, + bool &IsSrcPhys, bool &IsDstPhys) { + SrcReg = 0; + DstReg = 0; + if (MI.isCopy()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(1).getReg(); + } else if (MI.isInsertSubreg() || MI.isSubregToReg()) { + DstReg = MI.getOperand(0).getReg(); + SrcReg = MI.getOperand(2).getReg(); + } else + return false; + + IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return true; +} + +/// Test if the given register value, which is used by the +/// given instruction, is killed by the given instruction. +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, + LiveIntervals *LIS) { + if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && + !LIS->isNotInMIMap(MI)) { + // FIXME: Sometimes tryInstructionTransform() will add instructions and + // test whether they can be folded before keeping them. In this case it + // sets a kill before recursively calling tryInstructionTransform() again. + // If there is no interval available, we assume that this instruction is + // one of those. A kill flag is manually inserted on the operand so the + // check below will handle it. + LiveInterval &LI = LIS->getInterval(Reg); + // This is to match the kill flag version where undefs don't have kill + // flags. + if (!LI.hasAtLeastOneValue()) + return false; + + SlotIndex useIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(useIdx); + assert(I != LI.end() && "Reg must be live-in to use."); + return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx); + } + + return MI->killsRegister(Reg); +} + +/// Test if the given register value, which is used by the given +/// instruction, is killed by the given instruction. This looks through +/// coalescable copies to see if the original value is potentially not killed. +/// +/// For example, in this code: +/// +/// %reg1034 = copy %reg1024 +/// %reg1035 = copy %reg1025<kill> +/// %reg1036 = add %reg1034<kill>, %reg1035<kill> +/// +/// %reg1034 is not considered to be killed, since it is copied from a +/// register which is not killed. Treating it as not killed lets the +/// normal heuristics commute the (two-address) add, which lets +/// coalescing eliminate the extra copy. +/// +/// If allowFalsePositives is true then likely kills are treated as kills even +/// if it can't be proven that they are kills. +static bool isKilled(MachineInstr &MI, unsigned Reg, + const MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + LiveIntervals *LIS, + bool allowFalsePositives) { + MachineInstr *DefMI = &MI; + for (;;) { + // All uses of physical registers are likely to be kills. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + (allowFalsePositives || MRI->hasOneUse(Reg))) + return true; + if (!isPlainlyKilled(DefMI, Reg, LIS)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return true; + MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg); + // If there are multiple defs, we can't do a simple analysis, so just + // go with what the kill flag says. + if (std::next(Begin) != MRI->def_end()) + return true; + DefMI = Begin->getParent(); + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + // If the def is something other than a copy, then it isn't going to + // be coalesced, so follow the kill flag. + if (!isCopyToReg(*DefMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return true; + Reg = SrcReg; + } +} + +/// Return true if the specified MI uses the specified register as a two-address +/// use. If so, return the destination register by reference. +static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { + for (unsigned i = 0, NumOps = MI.getNumOperands(); i != NumOps; ++i) { + const MachineOperand &MO = MI.getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + unsigned ti; + if (MI.isRegTiedToDefOperand(i, &ti)) { + DstReg = MI.getOperand(ti).getReg(); + return true; + } + } + return false; +} + +/// Given a register, if has a single in-basic block use, return the use +/// instruction if it's a copy or a two-address use. +static +MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + const TargetInstrInfo *TII, + bool &IsCopy, + unsigned &DstReg, bool &IsDstPhys) { + if (!MRI->hasOneNonDBGUse(Reg)) + // None or more than one use. + return nullptr; + MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(Reg); + if (UseMI.getParent() != MBB) + return nullptr; + unsigned SrcReg; + bool IsSrcPhys; + if (isCopyToReg(UseMI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) { + IsCopy = true; + return &UseMI; + } + IsDstPhys = false; + if (isTwoAddrUse(UseMI, Reg, DstReg)) { + IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); + return &UseMI; + } + return nullptr; +} + +/// Return the physical register the specified virtual register might be mapped +/// to. +static unsigned +getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) { + while (TargetRegisterInfo::isVirtualRegister(Reg)) { + DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg); + if (SI == RegMap.end()) + return 0; + Reg = SI->second; + } + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + return Reg; + return 0; +} + +/// Return true if the two registers are equal or aliased. +static bool +regsAreCompatible(unsigned RegA, unsigned RegB, const TargetRegisterInfo *TRI) { + if (RegA == RegB) + return true; + if (!RegA || !RegB) + return false; + return TRI->regsOverlap(RegA, RegB); +} + + +/// Return true if it's potentially profitable to commute the two-address +/// instruction that's being processed. +bool +TwoAddressInstructionPass:: +isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, + MachineInstr *MI, unsigned Dist) { + if (OptLevel == CodeGenOpt::None) + return false; + + // Determine if it's profitable to commute this two address instruction. In + // general, we want no uses between this instruction and the definition of + // the two-address register. + // e.g. + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1028 + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // In this case, it might not be possible to coalesce the second MOV8rr + // instruction if the first one is coalesced. So it would be profitable to + // commute it: + // %reg1028<def> = EXTRACT_SUBREG %reg1027<kill>, 1 + // %reg1029<def> = MOV8rr %reg1028 + // %reg1029<def> = SHR8ri %reg1029, 7, %EFLAGS<imp-def,dead> + // insert => %reg1030<def> = MOV8rr %reg1029 + // %reg1030<def> = ADD8rr %reg1029<kill>, %reg1028<kill>, %EFLAGS<imp-def,dead> + + if (!isPlainlyKilled(MI, regC, LIS)) + return false; + + // Ok, we have something like: + // %reg1030<def> = ADD8rr %reg1028<kill>, %reg1029<kill>, %EFLAGS<imp-def,dead> + // let's see if it's worth commuting it. + + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r0 = MOV %reg1026 + // Commute the ADD to hopefully eliminate an otherwise unavoidable copy. + unsigned ToRegA = getMappedReg(regA, DstRegMap); + if (ToRegA) { + unsigned FromRegB = getMappedReg(regB, SrcRegMap); + unsigned FromRegC = getMappedReg(regC, SrcRegMap); + bool CompB = FromRegB && regsAreCompatible(FromRegB, ToRegA, TRI); + bool CompC = FromRegC && regsAreCompatible(FromRegC, ToRegA, TRI); + + // Compute if any of the following are true: + // -RegB is not tied to a register and RegC is compatible with RegA. + // -RegB is tied to the wrong physical register, but RegC is. + // -RegB is tied to the wrong physical register, and RegC isn't tied. + if ((!FromRegB && CompC) || (FromRegB && !CompB && (!FromRegC || CompC))) + return true; + // Don't compute if any of the following are true: + // -RegC is not tied to a register and RegB is compatible with RegA. + // -RegC is tied to the wrong physical register, but RegB is. + // -RegC is tied to the wrong physical register, and RegB isn't tied. + if ((!FromRegC && CompB) || (FromRegC && !CompC && (!FromRegB || CompB))) + return false; + } + + // If there is a use of regC between its last def (could be livein) and this + // instruction, then bail. + unsigned LastDefC = 0; + if (!noUseAfterLastDef(regC, Dist, LastDefC)) + return false; + + // If there is a use of regB between its last def (could be livein) and this + // instruction, then go ahead and make this transformation. + unsigned LastDefB = 0; + if (!noUseAfterLastDef(regB, Dist, LastDefB)) + return true; + + // Look for situation like this: + // %reg101 = MOV %reg100 + // %reg102 = ... + // %reg103 = ADD %reg102, %reg101 + // ... = %reg103 ... + // %reg100 = MOV %reg103 + // If there is a reversed copy chain from reg101 to reg103, commute the ADD + // to eliminate an otherwise unavoidable copy. + // FIXME: + // We can extend the logic further: If an pair of operands in an insn has + // been merged, the insn could be regarded as a virtual copy, and the virtual + // copy could also be used to construct a copy chain. + // To more generally minimize register copies, ideally the logic of two addr + // instruction pass should be integrated with register allocation pass where + // interference graph is available. + if (isRevCopyChain(regC, regA, 3)) + return true; + + if (isRevCopyChain(regB, regA, 3)) + return false; + + // Since there are no intervening uses for both registers, then commute + // if the def of regC is closer. Its live interval is shorter. + return LastDefB && LastDefC && LastDefC > LastDefB; +} + +/// Commute a two-address instruction and update the basic block, distance map, +/// and live variables if needed. Return true if it is successful. +bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI, + unsigned RegBIdx, + unsigned RegCIdx, + unsigned Dist) { + unsigned RegC = MI->getOperand(RegCIdx).getReg(); + DEBUG(dbgs() << "2addr: COMMUTING : " << *MI); + MachineInstr *NewMI = TII->commuteInstruction(MI, false, RegBIdx, RegCIdx); + + if (NewMI == nullptr) { + DEBUG(dbgs() << "2addr: COMMUTING FAILED!\n"); + return false; + } + + DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); + assert(NewMI == MI && + "TargetInstrInfo::commuteInstruction() should not return a new " + "instruction unless it was requested."); + + // Update source register map. + unsigned FromRegC = getMappedReg(RegC, SrcRegMap); + if (FromRegC) { + unsigned RegA = MI->getOperand(0).getReg(); + SrcRegMap[RegA] = FromRegC; + } + + return true; +} + +/// Return true if it is profitable to convert the given 2-address instruction +/// to a 3-address one. +bool +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ + // Look for situations like this: + // %reg1024<def> = MOV r1 + // %reg1025<def> = MOV r0 + // %reg1026<def> = ADD %reg1024, %reg1025 + // r2 = MOV %reg1026 + // Turn ADD into a 3-address instruction to avoid a copy. + unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + if (!FromRegB) + return false; + unsigned ToRegA = getMappedReg(RegA, DstRegMap); + return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); +} + +/// Convert the specified two-address instruction into a three address one. +/// Return true if this transformation was successful. +bool +TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned RegA, unsigned RegB, + unsigned Dist) { + // FIXME: Why does convertToThreeAddress() need an iterator reference? + MachineFunction::iterator MFI = MBB->getIterator(); + MachineInstr *NewMI = TII->convertToThreeAddress(MFI, mi, LV); + assert(MBB->getIterator() == MFI && + "convertToThreeAddress changed iterator reference"); + if (!NewMI) + return false; + + DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi); + DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); + bool Sunk = false; + + if (LIS) + LIS->ReplaceMachineInstrInMaps(mi, NewMI); + + if (NewMI->findRegisterUseOperand(RegB, false, TRI)) + // FIXME: Temporary workaround. If the new instruction doesn't + // uses RegB, convertToThreeAddress must have created more + // then one instruction. + Sunk = sink3AddrInstruction(NewMI, RegB, mi); + + MBB->erase(mi); // Nuke the old inst. + + if (!Sunk) { + DistanceMap.insert(std::make_pair(NewMI, Dist)); + mi = NewMI; + nmi = std::next(mi); + } + + // Update source and destination register maps. + SrcRegMap.erase(RegA); + DstRegMap.erase(RegB); + return true; +} + +/// Scan forward recursively for only uses, update maps if the use is a copy or +/// a two-address instruction. +void +TwoAddressInstructionPass::scanUses(unsigned DstReg) { + SmallVector<unsigned, 4> VirtRegPairs; + bool IsDstPhys; + bool IsCopy = false; + unsigned NewReg = 0; + unsigned Reg = DstReg; + while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, + NewReg, IsDstPhys)) { + if (IsCopy && !Processed.insert(UseMI).second) + break; + + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + VirtRegPairs.push_back(NewReg); + Reg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); + ToReg = FromReg; + } + bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!"); + } +} + +/// If the specified instruction is not yet processed, process it if it's a +/// copy. For a copy instruction, we find the physical registers the +/// source and destination registers might be mapped to. These are kept in +/// point-to maps used to determine future optimizations. e.g. +/// v1024 = mov r0 +/// v1025 = mov r1 +/// v1026 = add v1024, v1025 +/// r1 = mov r1026 +/// If 'add' is a two-address instruction, v1024, v1026 are both potentially +/// coalesced to r0 (from the input side). v1025 is mapped to r1. v1026 is +/// potentially joined with r1 on the output side. It's worthwhile to commute +/// 'add' to eliminate a copy. +void TwoAddressInstructionPass::processCopy(MachineInstr *MI) { + if (Processed.count(MI)) + return; + + bool IsSrcPhys, IsDstPhys; + unsigned SrcReg, DstReg; + if (!isCopyToReg(*MI, TII, SrcReg, DstReg, IsSrcPhys, IsDstPhys)) + return; + + if (IsDstPhys && !IsSrcPhys) + DstRegMap.insert(std::make_pair(SrcReg, DstReg)); + else if (!IsDstPhys && IsSrcPhys) { + bool isNew = SrcRegMap.insert(std::make_pair(DstReg, SrcReg)).second; + if (!isNew) + assert(SrcRegMap[DstReg] == SrcReg && + "Can't map to two src physical registers!"); + + scanUses(DstReg); + } + + Processed.insert(MI); + return; +} + +/// If there is one more local instruction that reads 'Reg' and it kills 'Reg, +/// consider moving the instruction below the kill instruction in order to +/// eliminate the need for the copy. +bool TwoAddressInstructionPass:: +rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) + return false; + + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = nullptr; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + if (KillMI->hasUnmodeledSideEffects() || KillMI->isCall() || + KillMI->isBranch() || KillMI->isTerminator()) + // Don't move pass calls, etc. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!MI->isSafeToMove(AA, SeenStore)) + return false; + + if (TII->getInstrLatency(InstrItins, MI) > 1) + // FIXME: Needs more sophisticated heuristics. + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) + Defs.insert(MOReg); + else { + Uses.insert(MOReg); + if (MOReg != Reg && (MO.isKill() || + (LIS && isPlainlyKilled(MI, MOReg, LIS)))) + Kills.insert(MOReg); + } + } + + // Move the copies connected to MI down as well. + MachineBasicBlock::iterator Begin = MI; + MachineBasicBlock::iterator AfterMI = std::next(Begin); + + MachineBasicBlock::iterator End = AfterMI; + while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { + Defs.insert(End->getOperand(0).getReg()); + ++End; + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + for (const MachineOperand &MO : OtherMI->operands()) { + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) { + if (Uses.count(MOReg)) + // Physical register use would be clobbered. + return false; + if (!MO.isDead() && Defs.count(MOReg)) + // May clobber a physical register def. + // FIXME: This may be too conservative. It's ok if the instruction + // is sunken completely below the use. + return false; + } else { + if (Defs.count(MOReg)) + return false; + bool isKill = MO.isKill() || + (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)); + if (MOReg != Reg && + ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) + // Don't want to extend other live ranges and update kills. + return false; + if (MOReg == Reg && !isKill) + // We can't schedule across a use of the register in question. + return false; + // Ensure that if this is register in question, its the kill we expect. + assert((MOReg != Reg || OtherMI == KillMI) && + "Found multiple kills of a register in a basic block"); + } + } + } + + // Move debug info as well. + while (Begin != MBB->begin() && std::prev(Begin)->isDebugValue()) + --Begin; + + nmi = End; + MachineBasicBlock::iterator InsertPos = KillPos; + if (LIS) { + // We have to move the copies first so that the MBB is still well-formed + // when calling handleMove(). + for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { + MachineInstr *CopyMI = MBBI; + ++MBBI; + MBB->splice(InsertPos, MBB, CopyMI); + LIS->handleMove(CopyMI); + InsertPos = CopyMI; + } + End = std::next(MachineBasicBlock::iterator(MI)); + } + + // Copies following MI may have been moved as well. + MBB->splice(InsertPos, MBB, Begin, End); + DistanceMap.erase(DI); + + // Update live variables + if (LIS) { + LIS->handleMove(MI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } + + DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); + return true; +} + +/// Return true if the re-scheduling will put the given instruction too close +/// to the defs of its register dependencies. +bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI) { + for (MachineInstr &DefMI : MRI->def_instructions(Reg)) { + if (DefMI.getParent() != MBB || DefMI.isCopy() || DefMI.isCopyLike()) + continue; + if (&DefMI == MI) + return true; // MI is defining something KillMI uses + DenseMap<MachineInstr*, unsigned>::iterator DDI = DistanceMap.find(&DefMI); + if (DDI == DistanceMap.end()) + return true; // Below MI + unsigned DefDist = DDI->second; + assert(Dist > DefDist && "Visited def already?"); + if (TII->getInstrLatency(InstrItins, &DefMI) > (Dist - DefDist)) + return true; + } + return false; +} + +/// If there is one more local instruction that reads 'Reg' and it kills 'Reg, +/// consider moving the kill instruction above the current two-address +/// instruction in order to eliminate the need for the copy. +bool TwoAddressInstructionPass:: +rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) + return false; + + MachineInstr *MI = &*mi; + DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = nullptr; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } + if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore = true; + if (!KillMI->isSafeToMove(AA, SeenStore)) + return false; + + SmallSet<unsigned, 2> Uses; + SmallSet<unsigned, 2> Kills; + SmallSet<unsigned, 2> Defs; + SmallSet<unsigned, 2> LiveDefs; + for (const MachineOperand &MO : KillMI->operands()) { + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!MOReg) + continue; + if (isDefTooClose(MOReg, DI->second, MI)) + return false; + bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); + if (MOReg == Reg && !isKill) + return false; + Uses.insert(MOReg); + if (isKill && MOReg != Reg) + Kills.insert(MOReg); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Defs.insert(MOReg); + if (!MO.isDead()) + LiveDefs.insert(MOReg); + } + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + if (OtherMI->hasUnmodeledSideEffects() || OtherMI->isCall() || + OtherMI->isBranch() || OtherMI->isTerminator()) + // Don't move pass calls, etc. + return false; + SmallVector<unsigned, 2> OtherDefs; + for (const MachineOperand &MO : OtherMI->operands()) { + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse()) { + if (Defs.count(MOReg)) + // Moving KillMI can clobber the physical register if the def has + // not been seen. + return false; + if (Kills.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + if (OtherMI != MI && MOReg == Reg && + !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)))) + // We can't schedule across a use of the register in question. + return false; + } else { + OtherDefs.push_back(MOReg); + } + } + + for (unsigned i = 0, e = OtherDefs.size(); i != e; ++i) { + unsigned MOReg = OtherDefs[i]; + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } + } + + // Move the old kill above MI, don't forget to move debug info as well. + MachineBasicBlock::iterator InsertPos = mi; + while (InsertPos != MBB->begin() && std::prev(InsertPos)->isDebugValue()) + --InsertPos; + MachineBasicBlock::iterator From = KillMI; + MachineBasicBlock::iterator To = std::next(From); + while (std::prev(From)->isDebugValue()) + --From; + MBB->splice(InsertPos, MBB, From, To); + + nmi = std::prev(InsertPos); // Backtrack so we process the moved instr. + DistanceMap.erase(DI); + + // Update live variables + if (LIS) { + LIS->handleMove(KillMI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } + + DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); + return true; +} + +/// Tries to commute the operand 'BaseOpIdx' and some other operand in the +/// given machine instruction to improve opportunities for coalescing and +/// elimination of a register to register copy. +/// +/// 'DstOpIdx' specifies the index of MI def operand. +/// 'BaseOpKilled' specifies if the register associated with 'BaseOpIdx' +/// operand is killed by the given instruction. +/// The 'Dist' arguments provides the distance of MI from the start of the +/// current basic block and it is used to determine if it is profitable +/// to commute operands in the instruction. +/// +/// Returns true if the transformation happened. Otherwise, returns false. +bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI, + unsigned DstOpIdx, + unsigned BaseOpIdx, + bool BaseOpKilled, + unsigned Dist) { + unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg(); + unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg(); + unsigned OpsNum = MI->getDesc().getNumOperands(); + unsigned OtherOpIdx = MI->getDesc().getNumDefs(); + for (; OtherOpIdx < OpsNum; OtherOpIdx++) { + // The call of findCommutedOpIndices below only checks if BaseOpIdx + // and OtherOpIdx are commutable, it does not really search for + // other commutable operands and does not change the values of passed + // variables. + if (OtherOpIdx == BaseOpIdx || + !TII->findCommutedOpIndices(MI, BaseOpIdx, OtherOpIdx)) + continue; + + unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg(); + bool AggressiveCommute = false; + + // If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp + // operands. This makes the live ranges of DstOp and OtherOp joinable. + bool DoCommute = + !BaseOpKilled && isKilled(*MI, OtherOpReg, MRI, TII, LIS, false); + + if (!DoCommute && + isProfitableToCommute(DstOpReg, BaseOpReg, OtherOpReg, MI, Dist)) { + DoCommute = true; + AggressiveCommute = true; + } + + // If it's profitable to commute, try to do so. + if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) { + ++NumCommuted; + if (AggressiveCommute) + ++NumAggrCommuted; + return true; + } + } + return false; +} + +/// For the case where an instruction has a single pair of tied register +/// operands, attempt some transformations that may either eliminate the tied +/// operands or improve the opportunities for coalescing away the register copy. +/// Returns true if no copy needs to be inserted to untie mi's operands +/// (either because they were untied, or because mi was rescheduled, and will +/// be visited again later). If the shouldOnlyCommute flag is true, only +/// instruction commutation is attempted. +bool TwoAddressInstructionPass:: +tryInstructionTransform(MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute) { + if (OptLevel == CodeGenOpt::None) + return false; + + MachineInstr &MI = *mi; + unsigned regA = MI.getOperand(DstIdx).getReg(); + unsigned regB = MI.getOperand(SrcIdx).getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(regB) && + "cannot make instruction into two-address form"); + bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); + + if (TargetRegisterInfo::isVirtualRegister(regA)) + scanUses(regA); + + bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist); + + // If the instruction is convertible to 3 Addr, instead + // of returning try 3 Addr transformation aggresively and + // use this variable to check later. Because it might be better. + // For example, we can just use `leal (%rsi,%rdi), %eax` and `ret` + // instead of the following code. + // addl %esi, %edi + // movl %edi, %eax + // ret + if (Commuted && !MI.isConvertibleTo3Addr()) + return false; + + if (shouldOnlyCommute) + return false; + + // If there is one more use of regB later in the same MBB, consider + // re-schedule this MI below it. + if (!Commuted && EnableRescheduling && rescheduleMIBelowKill(mi, nmi, regB)) { + ++NumReSchedDowns; + return true; + } + + // If we commuted, regB may have changed so we should re-sample it to avoid + // confusing the three address conversion below. + if (Commuted) { + regB = MI.getOperand(SrcIdx).getReg(); + regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); + } + + if (MI.isConvertibleTo3Addr()) { + // This instruction is potentially convertible to a true + // three-address instruction. Check if it is profitable. + if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { + // Try to convert it. + if (convertInstTo3Addr(mi, nmi, regA, regB, Dist)) { + ++NumConvertedTo3Addr; + return true; // Done with this instruction. + } + } + } + + // Return if it is commuted but 3 addr conversion is failed. + if (Commuted) + return false; + + // If there is one more use of regB later in the same MBB, consider + // re-schedule it before this MI if it's legal. + if (EnableRescheduling && rescheduleKillAboveMI(mi, nmi, regB)) { + ++NumReSchedUps; + return true; + } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (MI.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const MCInstrDesc &UnfoldMCID = TII->get(NewOpc); + if (UnfoldMCID.getNumDefs() == 1) { + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); + const TargetRegisterClass *RC = + TRI->getAllocatableClass( + TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF)); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector<MachineInstr *, 2> NewMIs; + if (!TII->unfoldMemoryOperand(*MF, &MI, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + MBB->insert(mi, NewMIs[0]); + MBB->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformResult = + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); + (void)TransformResult; + assert(!TransformResult && + "tryInstructionTransform() should return false."); + if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (MO.isReg() && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + + SmallVector<unsigned, 4> OrigRegs; + if (LIS) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isReg()) + OrigRegs.push_back(MO.getReg()); + } + } + + MI.eraseFromParent(); + + // Update LiveIntervals. + if (LIS) { + MachineBasicBlock::iterator Begin(NewMIs[0]); + MachineBasicBlock::iterator End(NewMIs[1]); + LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs); + } + + mi = NewMIs[1]; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + + return false; +} + +// Collect tied operands of MI that need to be handled. +// Rewrite trivial cases immediately. +// Return true if any tied operands where found, including the trivial ones. +bool TwoAddressInstructionPass:: +collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) { + const MCInstrDesc &MCID = MI->getDesc(); + bool AnyOps = false; + unsigned NumOps = MI->getNumOperands(); + + for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) { + unsigned DstIdx = 0; + if (!MI->isRegTiedToDefOperand(SrcIdx, &DstIdx)) + continue; + AnyOps = true; + MachineOperand &SrcMO = MI->getOperand(SrcIdx); + MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned SrcReg = SrcMO.getReg(); + unsigned DstReg = DstMO.getReg(); + // Tied constraint already satisfied? + if (SrcReg == DstReg) + continue; + + assert(SrcReg && SrcMO.isUse() && "two address instruction invalid"); + + // Deal with <undef> uses immediately - simply rewrite the src operand. + if (SrcMO.isUndef() && !DstMO.getSubReg()) { + // Constrain the DstReg register class if required. + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx, + TRI, *MF)) + MRI->constrainRegClass(DstReg, RC); + SrcMO.setReg(DstReg); + SrcMO.setSubReg(0); + DEBUG(dbgs() << "\t\trewrite undef:\t" << *MI); + continue; + } + TiedOperands[SrcReg].push_back(std::make_pair(SrcIdx, DstIdx)); + } + return AnyOps; +} + +// Process a list of tied MI operands that all use the same source register. +// The tied pairs are of the form (SrcIdx, DstIdx). +void +TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, + TiedPairList &TiedPairs, + unsigned &Dist) { + bool IsEarlyClobber = false; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second); + IsEarlyClobber |= DstMO.isEarlyClobber(); + } + + bool RemovedKillFlag = false; + bool AllUsesCopied = true; + unsigned LastCopiedReg = 0; + SlotIndex LastCopyIdx; + unsigned RegB = 0; + unsigned SubRegB = 0; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + unsigned SrcIdx = TiedPairs[tpi].first; + unsigned DstIdx = TiedPairs[tpi].second; + + const MachineOperand &DstMO = MI->getOperand(DstIdx); + unsigned RegA = DstMO.getReg(); + + // Grab RegB from the instruction because it may have changed if the + // instruction was commuted. + RegB = MI->getOperand(SrcIdx).getReg(); + SubRegB = MI->getOperand(SrcIdx).getSubReg(); + + if (RegA == RegB) { + // The register is tied to multiple destinations (or else we would + // not have continued this far), but this use of the register + // already matches the tied destination. Leave it. + AllUsesCopied = false; + continue; + } + LastCopiedReg = RegA; + + assert(TargetRegisterInfo::isVirtualRegister(RegB) && + "cannot make instruction into two-address form"); + +#ifndef NDEBUG + // First, verify that we don't have a use of "a" in the instruction + // (a = b + a for example) because our transformation will not + // work. This should never occur because we are in SSA form. + for (unsigned i = 0; i != MI->getNumOperands(); ++i) + assert(i == DstIdx || + !MI->getOperand(i).isReg() || + MI->getOperand(i).getReg() != RegA); +#endif + + // Emit a copy. + MachineInstrBuilder MIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), RegA); + // If this operand is folding a truncation, the truncation now moves to the + // copy so that the register classes remain valid for the operands. + MIB.addReg(RegB, 0, SubRegB); + const TargetRegisterClass *RC = MRI->getRegClass(RegB); + if (SubRegB) { + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA), + SubRegB) && + "tied subregister must be a truncation"); + // The superreg class will not be used to constrain the subreg class. + RC = nullptr; + } + else { + assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB)) + && "tied subregister must be a truncation"); + } + } + + // Update DistanceMap. + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + DistanceMap.insert(std::make_pair(PrevMI, Dist)); + DistanceMap[MI] = ++Dist; + + if (LIS) { + LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + LiveInterval &LI = LIS->getInterval(RegA); + VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + SlotIndex endIdx = + LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); + LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); + } + } + + DEBUG(dbgs() << "\t\tprepend:\t" << *MIB); + + MachineOperand &MO = MI->getOperand(SrcIdx); + assert(MO.isReg() && MO.getReg() == RegB && MO.isUse() && + "inconsistent operand info for 2-reg pass"); + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + + // Make sure regA is a legal regclass for the SrcIdx operand. + if (TargetRegisterInfo::isVirtualRegister(RegA) && + TargetRegisterInfo::isVirtualRegister(RegB)) + MRI->constrainRegClass(RegA, RC); + MO.setReg(RegA); + // The getMatchingSuper asserts guarantee that the register class projected + // by SubRegB is compatible with RegA with no subregister. So regardless of + // whether the dest oper writes a subreg, the source oper should not. + MO.setSubReg(0); + + // Propagate SrcRegMap. + SrcRegMap[RegA] = RegB; + } + + if (AllUsesCopied) { + if (!IsEarlyClobber) { + // Replace other (un-tied) uses of regB with LastCopiedReg. + for (MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB && + MO.isUse()) { + if (MO.isKill()) { + MO.setIsKill(false); + RemovedKillFlag = true; + } + MO.setReg(LastCopiedReg); + MO.setSubReg(0); + } + } + } + + // Update live variables for regB. + if (RemovedKillFlag && LV && LV->getVarInfo(RegB).removeKill(MI)) { + MachineBasicBlock::iterator PrevMI = MI; + --PrevMI; + LV->addVirtualRegisterKilled(RegB, PrevMI); + } + + // Update LiveIntervals. + if (LIS) { + LiveInterval &LI = LIS->getInterval(RegB); + SlotIndex MIIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(MIIdx); + assert(I != LI.end() && "RegB must be live-in to use."); + + SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); + if (I->end == UseIdx) + LI.removeSegment(LastCopyIdx, UseIdx); + } + + } else if (RemovedKillFlag) { + // Some tied uses of regB matched their destination registers, so + // regB is still used in this instruction, but a kill flag was + // removed from a different tied use of regB, so now we need to add + // a kill flag to one of the remaining uses of regB. + for (MachineOperand &MO : MI->operands()) { + if (MO.isReg() && MO.getReg() == RegB && MO.isUse()) { + MO.setIsKill(true); + break; + } + } + } +} + +/// Reduce two-address instructions to two operands. +bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; + const TargetMachine &TM = MF->getTarget(); + MRI = &MF->getRegInfo(); + TII = MF->getSubtarget().getInstrInfo(); + TRI = MF->getSubtarget().getRegisterInfo(); + InstrItins = MF->getSubtarget().getInstrItineraryData(); + LV = getAnalysisIfAvailable<LiveVariables>(); + LIS = getAnalysisIfAvailable<LiveIntervals>(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); + OptLevel = TM.getOptLevel(); + + bool MadeChange = false; + + DEBUG(dbgs() << "********** REWRITING TWO-ADDR INSTRS **********\n"); + DEBUG(dbgs() << "********** Function: " + << MF->getName() << '\n'); + + // This pass takes the function out of SSA form. + MRI->leaveSSA(); + + TiedOperandMap TiedOperands; + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + MBB = &*MBBI; + unsigned Dist = 0; + DistanceMap.clear(); + SrcRegMap.clear(); + DstRegMap.clear(); + Processed.clear(); + for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end(); + mi != me; ) { + MachineBasicBlock::iterator nmi = std::next(mi); + if (mi->isDebugValue()) { + mi = nmi; + continue; + } + + // Expand REG_SEQUENCE instructions. This will position mi at the first + // expanded instruction. + if (mi->isRegSequence()) + eliminateRegSequence(mi); + + DistanceMap.insert(std::make_pair(mi, ++Dist)); + + processCopy(&*mi); + + // First scan through all the tied register uses in this instruction + // and record a list of pairs of tied operands for each register. + if (!collectTiedOperands(mi, TiedOperands)) { + mi = nmi; + continue; + } + + ++NumTwoAddressInstrs; + MadeChange = true; + DEBUG(dbgs() << '\t' << *mi); + + // If the instruction has a single pair of tied operands, try some + // transformations that may either eliminate the tied operands or + // improve the opportunities for coalescing away the register copy. + if (TiedOperands.size() == 1) { + SmallVectorImpl<std::pair<unsigned, unsigned> > &TiedPairs + = TiedOperands.begin()->second; + if (TiedPairs.size() == 1) { + unsigned SrcIdx = TiedPairs[0].first; + unsigned DstIdx = TiedPairs[0].second; + unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); + unsigned DstReg = mi->getOperand(DstIdx).getReg(); + if (SrcReg != DstReg && + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { + // The tied operands have been eliminated or shifted further down + // the block to ease elimination. Continue processing with 'nmi'. + TiedOperands.clear(); + mi = nmi; + continue; + } + } + } + + // Now iterate over the information collected above. + for (auto &TO : TiedOperands) { + processTiedPairs(mi, TO.second, Dist); + DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); + } + + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->getOperand(0).setIsUndef(mi->getOperand(1).isUndef()); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + + // Clear TiedOperands here instead of at the top of the loop + // since most instructions do not have tied operands. + TiedOperands.clear(); + mi = nmi; + } + } + + if (LIS) + MF->verify(this, "After two-address instruction pass"); + + return MadeChange; +} + +/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process. +/// +/// The instruction is turned into a sequence of sub-register copies: +/// +/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1 +/// +/// Becomes: +/// +/// %dst:ssub0<def,undef> = COPY %v1 +/// %dst:ssub1<def> = COPY %v2 +/// +void TwoAddressInstructionPass:: +eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = MBBI; + unsigned DstReg = MI->getOperand(0).getReg(); + if (MI->getOperand(0).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(DstReg) || + !(MI->getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(nullptr); + } + + SmallVector<unsigned, 4> OrigRegs; + if (LIS) { + OrigRegs.push_back(MI->getOperand(0).getReg()); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) + OrigRegs.push_back(MI->getOperand(i).getReg()); + } + + bool DefEmitted = false; + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + MachineOperand &UseMO = MI->getOperand(i); + unsigned SrcReg = UseMO.getReg(); + unsigned SubIdx = MI->getOperand(i+1).getImm(); + // Nothing needs to be inserted for <undef> operands. + if (UseMO.isUndef()) + continue; + + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + bool isKill = UseMO.isKill(); + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + UseMO.setIsKill(false); + isKill = false; + break; + } + + // Insert the sub-register copy. + MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, SubIdx) + .addOperand(UseMO); + + // The first def needs an <undef> flag because there is no live register + // before it. + if (!DefEmitted) { + CopyMI->getOperand(0).setIsUndef(true); + // Return an iterator pointing to the first inserted instr. + MBBI = CopyMI; + } + DefEmitted = true; + + // Update LiveVariables' kill info. + if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + + DEBUG(dbgs() << "Inserted: " << *CopyMI); + } + + MachineBasicBlock::iterator EndMBBI = + std::next(MachineBasicBlock::iterator(MI)); + + if (!DefEmitted) { + DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + } else { + DEBUG(dbgs() << "Eliminated: " << *MI); + MI->eraseFromParent(); + } + + // Udpate LiveIntervals. + if (LIS) + LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs); +} diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp new file mode 100644 index 0000000..8c9631e --- /dev/null +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -0,0 +1,208 @@ +//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass is an extremely simple version of the SimplifyCFG pass. Its sole +// job is to delete LLVM basic blocks that are not reachable from the entry +// node. To do this, it performs a simple depth first traversal of the CFG, +// then deletes any unvisited nodes. +// +// Note that this pass is really a hack. In particular, the instruction +// selectors for various targets should just not generate code for unreachable +// blocks. Until LLVM has a more systematic way of defining instruction +// selectors, however, we cannot really expect them to handle additional +// complexity. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" +#include "llvm/Target/TargetInstrInfo.h" +using namespace llvm; + +namespace { + class UnreachableBlockElim : public FunctionPass { + bool runOnFunction(Function &F) override; + public: + static char ID; // Pass identification, replacement for typeid + UnreachableBlockElim() : FunctionPass(ID) { + initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry()); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addPreserved<DominatorTreeWrapperPass>(); + } + }; +} +char UnreachableBlockElim::ID = 0; +INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim", + "Remove unreachable blocks from the CFG", false, false) + +FunctionPass *llvm::createUnreachableBlockEliminationPass() { + return new UnreachableBlockElim(); +} + +bool UnreachableBlockElim::runOnFunction(Function &F) { + SmallPtrSet<BasicBlock*, 8> Reachable; + + // Mark all reachable blocks. + for (BasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<BasicBlock*> DeadBlocks; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + if (!Reachable.count(&*I)) { + BasicBlock *BB = &*I; + DeadBlocks.push_back(BB); + while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) { + PN->replaceAllUsesWith(Constant::getNullValue(PN->getType())); + BB->getInstList().pop_front(); + } + for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI) + (*SI)->removePredecessor(BB); + BB->dropAllReferences(); + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { + DeadBlocks[i]->eraseFromParent(); + } + + return !DeadBlocks.empty(); +} + + +namespace { + class UnreachableMachineBlockElim : public MachineFunctionPass { + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + MachineModuleInfo *MMI; + public: + static char ID; // Pass identification, replacement for typeid + UnreachableMachineBlockElim() : MachineFunctionPass(ID) {} + }; +} +char UnreachableMachineBlockElim::ID = 0; + +INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination", + "Remove unreachable machine basic blocks", false, false) + +char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID; + +void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved<MachineLoopInfo>(); + AU.addPreserved<MachineDominatorTree>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) { + SmallPtrSet<MachineBasicBlock*, 8> Reachable; + bool ModifiedPHI = false; + + MMI = getAnalysisIfAvailable<MachineModuleInfo>(); + MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>(); + MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>(); + + // Mark all reachable blocks. + for (MachineBasicBlock *BB : depth_first_ext(&F, Reachable)) + (void)BB/* Mark all reachable blocks */; + + // Loop over all dead blocks, remembering them and deleting all instructions + // in them. + std::vector<MachineBasicBlock*> DeadBlocks; + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = &*I; + + // Test for deadness. + if (!Reachable.count(BB)) { + DeadBlocks.push_back(BB); + + // Update dominator and loop info. + if (MLI) MLI->removeBlock(BB); + if (MDT && MDT->getNode(BB)) MDT->eraseNode(BB); + + while (BB->succ_begin() != BB->succ_end()) { + MachineBasicBlock* succ = *BB->succ_begin(); + + MachineBasicBlock::iterator start = succ->begin(); + while (start != succ->end() && start->isPHI()) { + for (unsigned i = start->getNumOperands() - 1; i >= 2; i-=2) + if (start->getOperand(i).isMBB() && + start->getOperand(i).getMBB() == BB) { + start->RemoveOperand(i); + start->RemoveOperand(i-1); + } + + start++; + } + + BB->removeSuccessor(BB->succ_begin()); + } + } + } + + // Actually remove the blocks now. + for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) + DeadBlocks[i]->eraseFromParent(); + + // Cleanup PHI nodes. + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + MachineBasicBlock *BB = &*I; + // Prune unneeded PHI entries. + SmallPtrSet<MachineBasicBlock*, 8> preds(BB->pred_begin(), + BB->pred_end()); + MachineBasicBlock::iterator phi = BB->begin(); + while (phi != BB->end() && phi->isPHI()) { + for (unsigned i = phi->getNumOperands() - 1; i >= 2; i-=2) + if (!preds.count(phi->getOperand(i).getMBB())) { + phi->RemoveOperand(i); + phi->RemoveOperand(i-1); + ModifiedPHI = true; + } + + if (phi->getNumOperands() == 3) { + unsigned Input = phi->getOperand(1).getReg(); + unsigned Output = phi->getOperand(0).getReg(); + + MachineInstr* temp = phi; + ++phi; + temp->eraseFromParent(); + ModifiedPHI = true; + + if (Input != Output) { + MachineRegisterInfo &MRI = F.getRegInfo(); + MRI.constrainRegClass(Input, MRI.getRegClass(Output)); + MRI.replaceRegWith(Output, Input); + } + + continue; + } + + ++phi; + } + } + + F.RenumberBlocks(); + + return (!DeadBlocks.empty() || ModifiedPHI); +} diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp new file mode 100644 index 0000000..bf1c0dc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -0,0 +1,448 @@ +//===-- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the VirtRegMap class. +// +// It also contains implementations of the Spiller interface, which, given a +// virtual register map and a machine function, eliminates all virtual +// references by replacing them with physical register references - adding spill +// code as necessary. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/VirtRegMap.h" +#include "LiveDebugVariables.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include <algorithm> +using namespace llvm; + +#define DEBUG_TYPE "regalloc" + +STATISTIC(NumSpillSlots, "Number of spill slots allocated"); +STATISTIC(NumIdCopies, "Number of identity moves eliminated after rewriting"); + +//===----------------------------------------------------------------------===// +// VirtRegMap implementation +//===----------------------------------------------------------------------===// + +char VirtRegMap::ID = 0; + +INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false) + +bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { + MRI = &mf.getRegInfo(); + TII = mf.getSubtarget().getInstrInfo(); + TRI = mf.getSubtarget().getRegisterInfo(); + MF = &mf; + + Virt2PhysMap.clear(); + Virt2StackSlotMap.clear(); + Virt2SplitMap.clear(); + + grow(); + return false; +} + +void VirtRegMap::grow() { + unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); + Virt2PhysMap.resize(NumRegs); + Virt2StackSlotMap.resize(NumRegs); + Virt2SplitMap.resize(NumRegs); +} + +unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { + int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), + RC->getAlignment()); + ++NumSpillSlots; + return SS; +} + +bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { + unsigned Hint = MRI->getSimpleHint(VirtReg); + if (!Hint) + return 0; + if (TargetRegisterInfo::isVirtualRegister(Hint)) + Hint = getPhys(Hint); + return getPhys(VirtReg) == Hint; +} + +bool VirtRegMap::hasKnownPreference(unsigned VirtReg) { + std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg); + if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) + return true; + if (TargetRegisterInfo::isVirtualRegister(Hint.second)) + return hasPhys(Hint.second); + return false; +} + +int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg); + return Virt2StackSlotMap[virtReg] = createSpillSlot(RC); +} + +void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { + assert(TargetRegisterInfo::isVirtualRegister(virtReg)); + assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT && + "attempt to assign stack slot to already spilled register"); + assert((SS >= 0 || + (SS >= MF->getFrameInfo()->getObjectIndexBegin())) && + "illegal fixed frame index"); + Virt2StackSlotMap[virtReg] = SS; +} + +void VirtRegMap::print(raw_ostream &OS, const Module*) const { + OS << "********** REGISTER MAP **********\n"; + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) { + OS << '[' << PrintReg(Reg, TRI) << " -> " + << PrintReg(Virt2PhysMap[Reg], TRI) << "] " + << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; + } + } + + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) { + OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg] + << "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n"; + } + } + OS << '\n'; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VirtRegMap::dump() const { + print(dbgs()); +} +#endif + +//===----------------------------------------------------------------------===// +// VirtRegRewriter +//===----------------------------------------------------------------------===// +// +// The VirtRegRewriter is the last of the register allocator passes. +// It rewrites virtual registers to physical registers as specified in the +// VirtRegMap analysis. It also updates live-in information on basic blocks +// according to LiveIntervals. +// +namespace { +class VirtRegRewriter : public MachineFunctionPass { + MachineFunction *MF; + const TargetMachine *TM; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + MachineRegisterInfo *MRI; + SlotIndexes *Indexes; + LiveIntervals *LIS; + VirtRegMap *VRM; + + void rewrite(); + void addMBBLiveIns(); + bool readsUndefSubreg(const MachineOperand &MO) const; + void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const; + +public: + static char ID; + VirtRegRewriter() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + bool runOnMachineFunction(MachineFunction&) override; +}; +} // end anonymous namespace + +char &llvm::VirtRegRewriterID = VirtRegRewriter::ID; + +INITIALIZE_PASS_BEGIN(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) +INITIALIZE_PASS_DEPENDENCY(SlotIndexes) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) +INITIALIZE_PASS_DEPENDENCY(LiveStacks) +INITIALIZE_PASS_DEPENDENCY(VirtRegMap) +INITIALIZE_PASS_END(VirtRegRewriter, "virtregrewriter", + "Virtual Register Rewriter", false, false) + +char VirtRegRewriter::ID = 0; + +void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired<LiveIntervals>(); + AU.addRequired<SlotIndexes>(); + AU.addPreserved<SlotIndexes>(); + AU.addRequired<LiveDebugVariables>(); + AU.addRequired<LiveStacks>(); + AU.addPreserved<LiveStacks>(); + AU.addRequired<VirtRegMap>(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) { + MF = &fn; + TM = &MF->getTarget(); + TRI = MF->getSubtarget().getRegisterInfo(); + TII = MF->getSubtarget().getInstrInfo(); + MRI = &MF->getRegInfo(); + Indexes = &getAnalysis<SlotIndexes>(); + LIS = &getAnalysis<LiveIntervals>(); + VRM = &getAnalysis<VirtRegMap>(); + DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" + << "********** Function: " + << MF->getName() << '\n'); + DEBUG(VRM->dump()); + + // Add kill flags while we still have virtual registers. + LIS->addKillFlags(VRM); + + // Live-in lists on basic blocks are required for physregs. + addMBBLiveIns(); + + // Rewrite virtual registers. + rewrite(); + + // Write out new DBG_VALUE instructions. + getAnalysis<LiveDebugVariables>().emitDebugValues(VRM); + + // All machine operands and other references to virtual registers have been + // replaced. Remove the virtual registers and release all the transient data. + VRM->clearAllVirt(); + MRI->clearVirtRegs(); + return true; +} + +void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI, + unsigned PhysReg) const { + assert(!LI.empty()); + assert(LI.hasSubRanges()); + + typedef std::pair<const LiveInterval::SubRange *, + LiveInterval::const_iterator> SubRangeIteratorPair; + SmallVector<SubRangeIteratorPair, 4> SubRanges; + SlotIndex First; + SlotIndex Last; + for (const LiveInterval::SubRange &SR : LI.subranges()) { + SubRanges.push_back(std::make_pair(&SR, SR.begin())); + if (!First.isValid() || SR.segments.front().start < First) + First = SR.segments.front().start; + if (!Last.isValid() || SR.segments.back().end > Last) + Last = SR.segments.back().end; + } + + // Check all mbb start positions between First and Last while + // simulatenously advancing an iterator for each subrange. + for (SlotIndexes::MBBIndexIterator MBBI = Indexes->findMBBIndex(First); + MBBI != Indexes->MBBIndexEnd() && MBBI->first <= Last; ++MBBI) { + SlotIndex MBBBegin = MBBI->first; + // Advance all subrange iterators so that their end position is just + // behind MBBBegin (or the iterator is at the end). + LaneBitmask LaneMask = 0; + for (auto &RangeIterPair : SubRanges) { + const LiveInterval::SubRange *SR = RangeIterPair.first; + LiveInterval::const_iterator &SRI = RangeIterPair.second; + while (SRI != SR->end() && SRI->end <= MBBBegin) + ++SRI; + if (SRI == SR->end()) + continue; + if (SRI->start <= MBBBegin) + LaneMask |= SR->LaneMask; + } + if (LaneMask == 0) + continue; + MachineBasicBlock *MBB = MBBI->second; + MBB->addLiveIn(PhysReg, LaneMask); + } +} + +// Compute MBB live-in lists from virtual register live ranges and their +// assignments. +void VirtRegRewriter::addMBBLiveIns() { + for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) { + unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx); + if (MRI->reg_nodbg_empty(VirtReg)) + continue; + LiveInterval &LI = LIS->getInterval(VirtReg); + if (LI.empty() || LIS->intervalIsInOneMBB(LI)) + continue; + // This is a virtual register that is live across basic blocks. Its + // assigned PhysReg must be marked as live-in to those blocks. + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register."); + + if (LI.hasSubRanges()) { + addLiveInsForSubRanges(LI, PhysReg); + } else { + // Go over MBB begin positions and see if we have segments covering them. + // The following works because segments and the MBBIndex list are both + // sorted by slot indexes. + SlotIndexes::MBBIndexIterator I = Indexes->MBBIndexBegin(); + for (const auto &Seg : LI) { + I = Indexes->advanceMBBIndex(I, Seg.start); + for (; I != Indexes->MBBIndexEnd() && I->first < Seg.end; ++I) { + MachineBasicBlock *MBB = I->second; + MBB->addLiveIn(PhysReg); + } + } + } + } + + // Sort and unique MBB LiveIns as we've not checked if SubReg/PhysReg were in + // each MBB's LiveIns set before calling addLiveIn on them. + for (MachineBasicBlock &MBB : *MF) + MBB.sortUniqueLiveIns(); +} + +/// Returns true if the given machine operand \p MO only reads undefined lanes. +/// The function only works for use operands with a subregister set. +bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const { + // Shortcut if the operand is already marked undef. + if (MO.isUndef()) + return true; + + unsigned Reg = MO.getReg(); + const LiveInterval &LI = LIS->getInterval(Reg); + const MachineInstr &MI = *MO.getParent(); + SlotIndex BaseIndex = LIS->getInstructionIndex(&MI); + // This code is only meant to handle reading undefined subregisters which + // we couldn't properly detect before. + assert(LI.liveAt(BaseIndex) && + "Reads of completely dead register should be marked undef already"); + unsigned SubRegIdx = MO.getSubReg(); + LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx); + // See if any of the relevant subregister liveranges is defined at this point. + for (const LiveInterval::SubRange &SR : LI.subranges()) { + if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex)) + return false; + } + return true; +} + +void VirtRegRewriter::rewrite() { + bool NoSubRegLiveness = !MRI->subRegLivenessEnabled(); + SmallVector<unsigned, 8> SuperDeads; + SmallVector<unsigned, 8> SuperDefs; + SmallVector<unsigned, 8> SuperKills; + + for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); + MBBI != MBBE; ++MBBI) { + DEBUG(MBBI->print(dbgs(), Indexes)); + for (MachineBasicBlock::instr_iterator + MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { + MachineInstr *MI = &*MII; + ++MII; + + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + + // Make sure MRI knows about registers clobbered by regmasks. + if (MO.isRegMask()) + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + + if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) + continue; + unsigned VirtReg = MO.getReg(); + unsigned PhysReg = VRM->getPhys(VirtReg); + assert(PhysReg != VirtRegMap::NO_PHYS_REG && + "Instruction uses unmapped VirtReg"); + assert(!MRI->isReserved(PhysReg) && "Reserved register assignment"); + + // Preserve semantics of sub-register operands. + unsigned SubReg = MO.getSubReg(); + if (SubReg != 0) { + if (NoSubRegLiveness) { + // A virtual register kill refers to the whole register, so we may + // have to add <imp-use,kill> operands for the super-register. A + // partial redef always kills and redefines the super-register. + if (MO.readsReg() && (MO.isDef() || MO.isKill())) + SuperKills.push_back(PhysReg); + + if (MO.isDef()) { + // Also add implicit defs for the super-register. + if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); + } + } else { + if (MO.isUse()) { + if (readsUndefSubreg(MO)) + // We need to add an <undef> flag if the subregister is + // completely undefined (and we are not adding super-register + // defs). + MO.setIsUndef(true); + } else if (!MO.isDead()) { + assert(MO.isDef()); + } + } + + // The <def,undef> flag only makes sense for sub-register defs, and + // we are substituting a full physreg. An <imp-use,kill> operand + // from the SuperKills list will represent the partial read of the + // super-register. + if (MO.isDef()) + MO.setIsUndef(false); + + // PhysReg operands cannot have subregister indexes. + PhysReg = TRI->getSubReg(PhysReg, SubReg); + assert(PhysReg && "Invalid SubReg for physical register"); + MO.setSubReg(0); + } + // Rewrite. Note we could have used MachineOperand::substPhysReg(), but + // we need the inlining here. + MO.setReg(PhysReg); + } + + // Add any missing super-register kills after rewriting the whole + // instruction. + while (!SuperKills.empty()) + MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + + while (!SuperDeads.empty()) + MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + + while (!SuperDefs.empty()) + MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + + DEBUG(dbgs() << "> " << *MI); + + // Finally, remove any identity copies. + if (MI->isIdentityCopy()) { + ++NumIdCopies; + DEBUG(dbgs() << "Deleting identity copy.\n"); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); + } + } + } +} + diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp new file mode 100644 index 0000000..2426c27 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp @@ -0,0 +1,1239 @@ +//===-- WinEHPrepare - Prepare exception handling for code generation ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass lowers LLVM IR exception handling into something closer to what the +// backend wants for functions using a personality function from a runtime +// provided by MSVC. Functions with other personality functions are left alone +// and may be prepared by other passes. In particular, all supported MSVC +// personality functions require cleanup code to be outlined, and the C++ +// personality requires catch handler code to be outlined. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/WinEHFuncInfo.h" +#include "llvm/IR/Verifier.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" + +using namespace llvm; + +#define DEBUG_TYPE "winehprepare" + +static cl::opt<bool> DisableDemotion( + "disable-demotion", cl::Hidden, + cl::desc( + "Clone multicolor basic blocks but do not demote cross funclet values"), + cl::init(false)); + +static cl::opt<bool> DisableCleanups( + "disable-cleanups", cl::Hidden, + cl::desc("Do not remove implausible terminators or other similar cleanups"), + cl::init(false)); + +namespace { + +class WinEHPrepare : public FunctionPass { +public: + static char ID; // Pass identification, replacement for typeid. + WinEHPrepare(const TargetMachine *TM = nullptr) : FunctionPass(ID) {} + + bool runOnFunction(Function &Fn) override; + + bool doFinalization(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { + return "Windows exception handling preparation"; + } + +private: + void insertPHIStores(PHINode *OriginalPHI, AllocaInst *SpillSlot); + void + insertPHIStore(BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, + SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist); + AllocaInst *insertPHILoads(PHINode *PN, Function &F); + void replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, + DenseMap<BasicBlock *, Value *> &Loads, Function &F); + bool prepareExplicitEH(Function &F); + void colorFunclets(Function &F); + + void demotePHIsOnFunclets(Function &F); + void cloneCommonBlocks(Function &F); + void removeImplausibleInstructions(Function &F); + void cleanupPreparedFunclets(Function &F); + void verifyPreparedFunclets(Function &F); + + // All fields are reset by runOnFunction. + EHPersonality Personality = EHPersonality::Unknown; + + DenseMap<BasicBlock *, ColorVector> BlockColors; + MapVector<BasicBlock *, std::vector<BasicBlock *>> FuncletBlocks; +}; + +} // end anonymous namespace + +char WinEHPrepare::ID = 0; +INITIALIZE_TM_PASS(WinEHPrepare, "winehprepare", "Prepare Windows exceptions", + false, false) + +FunctionPass *llvm::createWinEHPass(const TargetMachine *TM) { + return new WinEHPrepare(TM); +} + +bool WinEHPrepare::runOnFunction(Function &Fn) { + if (!Fn.hasPersonalityFn()) + return false; + + // Classify the personality to see what kind of preparation we need. + Personality = classifyEHPersonality(Fn.getPersonalityFn()); + + // Do nothing if this is not a funclet-based personality. + if (!isFuncletEHPersonality(Personality)) + return false; + + return prepareExplicitEH(Fn); +} + +bool WinEHPrepare::doFinalization(Module &M) { return false; } + +void WinEHPrepare::getAnalysisUsage(AnalysisUsage &AU) const {} + +static int addUnwindMapEntry(WinEHFuncInfo &FuncInfo, int ToState, + const BasicBlock *BB) { + CxxUnwindMapEntry UME; + UME.ToState = ToState; + UME.Cleanup = BB; + FuncInfo.CxxUnwindMap.push_back(UME); + return FuncInfo.getLastStateNumber(); +} + +static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow, + int TryHigh, int CatchHigh, + ArrayRef<const CatchPadInst *> Handlers) { + WinEHTryBlockMapEntry TBME; + TBME.TryLow = TryLow; + TBME.TryHigh = TryHigh; + TBME.CatchHigh = CatchHigh; + assert(TBME.TryLow <= TBME.TryHigh); + for (const CatchPadInst *CPI : Handlers) { + WinEHHandlerType HT; + Constant *TypeInfo = cast<Constant>(CPI->getArgOperand(0)); + if (TypeInfo->isNullValue()) + HT.TypeDescriptor = nullptr; + else + HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts()); + HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue(); + HT.Handler = CPI->getParent(); + if (isa<ConstantPointerNull>(CPI->getArgOperand(2))) + HT.CatchObj.Alloca = nullptr; + else + HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2)); + TBME.HandlerArray.push_back(HT); + } + FuncInfo.TryBlockMap.push_back(TBME); +} + +static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CleanupPad) { + for (const User *U : CleanupPad->users()) + if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) + return CRI->getUnwindDest(); + return nullptr; +} + +static void calculateStateNumbersForInvokes(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + auto *F = const_cast<Function *>(Fn); + DenseMap<BasicBlock *, ColorVector> BlockColors = colorEHFunclets(*F); + for (BasicBlock &BB : *F) { + auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); + if (!II) + continue; + + auto &BBColors = BlockColors[&BB]; + assert(BBColors.size() == 1 && "multi-color BB not removed by preparation"); + BasicBlock *FuncletEntryBB = BBColors.front(); + + BasicBlock *FuncletUnwindDest; + auto *FuncletPad = + dyn_cast<FuncletPadInst>(FuncletEntryBB->getFirstNonPHI()); + assert(FuncletPad || FuncletEntryBB == &Fn->getEntryBlock()); + if (!FuncletPad) + FuncletUnwindDest = nullptr; + else if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) + FuncletUnwindDest = CatchPad->getCatchSwitch()->getUnwindDest(); + else if (auto *CleanupPad = dyn_cast<CleanupPadInst>(FuncletPad)) + FuncletUnwindDest = getCleanupRetUnwindDest(CleanupPad); + else + llvm_unreachable("unexpected funclet pad!"); + + BasicBlock *InvokeUnwindDest = II->getUnwindDest(); + int BaseState = -1; + if (FuncletUnwindDest == InvokeUnwindDest) { + auto BaseStateI = FuncInfo.FuncletBaseStateMap.find(FuncletPad); + if (BaseStateI != FuncInfo.FuncletBaseStateMap.end()) + BaseState = BaseStateI->second; + } + + if (BaseState != -1) { + FuncInfo.InvokeStateMap[II] = BaseState; + } else { + Instruction *PadInst = InvokeUnwindDest->getFirstNonPHI(); + assert(FuncInfo.EHPadStateMap.count(PadInst) && "EH Pad has no state!"); + FuncInfo.InvokeStateMap[II] = FuncInfo.EHPadStateMap[PadInst]; + } + } +} + +// Given BB which ends in an unwind edge, return the EHPad that this BB belongs +// to. If the unwind edge came from an invoke, return null. +static const BasicBlock *getEHPadFromPredecessor(const BasicBlock *BB, + Value *ParentPad) { + const TerminatorInst *TI = BB->getTerminator(); + if (isa<InvokeInst>(TI)) + return nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(TI)) { + if (CatchSwitch->getParentPad() != ParentPad) + return nullptr; + return BB; + } + assert(!TI->isEHPad() && "unexpected EHPad!"); + auto *CleanupPad = cast<CleanupReturnInst>(TI)->getCleanupPad(); + if (CleanupPad->getParentPad() != ParentPad) + return nullptr; + return CleanupPad->getParent(); +} + +static void calculateCXXStateNumbers(WinEHFuncInfo &FuncInfo, + const Instruction *FirstNonPHI, + int ParentState) { + const BasicBlock *BB = FirstNonPHI->getParent(); + assert(BB->isEHPad() && "not a funclet!"); + + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) { + assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 && + "shouldn't revist catch funclets!"); + + SmallVector<const CatchPadInst *, 2> Handlers; + for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) { + auto *CatchPad = cast<CatchPadInst>(CatchPadBB->getFirstNonPHI()); + Handlers.push_back(CatchPad); + } + int TryLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr); + FuncInfo.EHPadStateMap[CatchSwitch] = TryLow; + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CatchSwitch->getParentPad()))) + calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + TryLow); + int CatchLow = addUnwindMapEntry(FuncInfo, ParentState, nullptr); + + // catchpads are separate funclets in C++ EH due to the way rethrow works. + int TryHigh = CatchLow - 1; + for (const auto *CatchPad : Handlers) { + FuncInfo.FuncletBaseStateMap[CatchPad] = CatchLow; + for (const User *U : CatchPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) + if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); + if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) + if (getCleanupRetUnwindDest(InnerCleanupPad) == + CatchSwitch->getUnwindDest()) + calculateCXXStateNumbers(FuncInfo, UserI, CatchLow); + } + } + int CatchHigh = FuncInfo.getLastStateNumber(); + addTryBlockMapEntry(FuncInfo, TryLow, TryHigh, CatchHigh, Handlers); + DEBUG(dbgs() << "TryLow[" << BB->getName() << "]: " << TryLow << '\n'); + DEBUG(dbgs() << "TryHigh[" << BB->getName() << "]: " << TryHigh << '\n'); + DEBUG(dbgs() << "CatchHigh[" << BB->getName() << "]: " << CatchHigh + << '\n'); + } else { + auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); + + // It's possible for a cleanup to be visited twice: it might have multiple + // cleanupret instructions. + if (FuncInfo.EHPadStateMap.count(CleanupPad)) + return; + + int CleanupState = addUnwindMapEntry(FuncInfo, ParentState, BB); + FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; + DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) { + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CleanupPad->getParentPad()))) { + calculateCXXStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + CleanupState); + } + } + for (const User *U : CleanupPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (UserI->isEHPad()) + report_fatal_error("Cleanup funclets for the MSVC++ personality cannot " + "contain exceptional actions"); + } + } +} + +static int addSEHExcept(WinEHFuncInfo &FuncInfo, int ParentState, + const Function *Filter, const BasicBlock *Handler) { + SEHUnwindMapEntry Entry; + Entry.ToState = ParentState; + Entry.IsFinally = false; + Entry.Filter = Filter; + Entry.Handler = Handler; + FuncInfo.SEHUnwindMap.push_back(Entry); + return FuncInfo.SEHUnwindMap.size() - 1; +} + +static int addSEHFinally(WinEHFuncInfo &FuncInfo, int ParentState, + const BasicBlock *Handler) { + SEHUnwindMapEntry Entry; + Entry.ToState = ParentState; + Entry.IsFinally = true; + Entry.Filter = nullptr; + Entry.Handler = Handler; + FuncInfo.SEHUnwindMap.push_back(Entry); + return FuncInfo.SEHUnwindMap.size() - 1; +} + +static void calculateSEHStateNumbers(WinEHFuncInfo &FuncInfo, + const Instruction *FirstNonPHI, + int ParentState) { + const BasicBlock *BB = FirstNonPHI->getParent(); + assert(BB->isEHPad() && "no a funclet!"); + + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FirstNonPHI)) { + assert(FuncInfo.EHPadStateMap.count(CatchSwitch) == 0 && + "shouldn't revist catch funclets!"); + + // Extract the filter function and the __except basic block and create a + // state for them. + assert(CatchSwitch->getNumHandlers() == 1 && + "SEH doesn't have multiple handlers per __try"); + const auto *CatchPad = + cast<CatchPadInst>((*CatchSwitch->handler_begin())->getFirstNonPHI()); + const BasicBlock *CatchPadBB = CatchPad->getParent(); + const Constant *FilterOrNull = + cast<Constant>(CatchPad->getArgOperand(0)->stripPointerCasts()); + const Function *Filter = dyn_cast<Function>(FilterOrNull); + assert((Filter || FilterOrNull->isNullValue()) && + "unexpected filter value"); + int TryState = addSEHExcept(FuncInfo, ParentState, Filter, CatchPadBB); + + // Everything in the __try block uses TryState as its parent state. + FuncInfo.EHPadStateMap[CatchSwitch] = TryState; + DEBUG(dbgs() << "Assigning state #" << TryState << " to BB " + << CatchPadBB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = getEHPadFromPredecessor(PredBlock, + CatchSwitch->getParentPad()))) + calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + TryState); + + // Everything in the __except block unwinds to ParentState, just like code + // outside the __try. + for (const User *U : CatchPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (auto *InnerCatchSwitch = dyn_cast<CatchSwitchInst>(UserI)) + if (InnerCatchSwitch->getUnwindDest() == CatchSwitch->getUnwindDest()) + calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + if (auto *InnerCleanupPad = dyn_cast<CleanupPadInst>(UserI)) + if (getCleanupRetUnwindDest(InnerCleanupPad) == + CatchSwitch->getUnwindDest()) + calculateSEHStateNumbers(FuncInfo, UserI, ParentState); + } + } else { + auto *CleanupPad = cast<CleanupPadInst>(FirstNonPHI); + + // It's possible for a cleanup to be visited twice: it might have multiple + // cleanupret instructions. + if (FuncInfo.EHPadStateMap.count(CleanupPad)) + return; + + int CleanupState = addSEHFinally(FuncInfo, ParentState, BB); + FuncInfo.EHPadStateMap[CleanupPad] = CleanupState; + DEBUG(dbgs() << "Assigning state #" << CleanupState << " to BB " + << BB->getName() << '\n'); + for (const BasicBlock *PredBlock : predecessors(BB)) + if ((PredBlock = + getEHPadFromPredecessor(PredBlock, CleanupPad->getParentPad()))) + calculateSEHStateNumbers(FuncInfo, PredBlock->getFirstNonPHI(), + CleanupState); + for (const User *U : CleanupPad->users()) { + const auto *UserI = cast<Instruction>(U); + if (UserI->isEHPad()) + report_fatal_error("Cleanup funclets for the SEH personality cannot " + "contain exceptional actions"); + } + } +} + +static bool isTopLevelPadForMSVC(const Instruction *EHPad) { + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(EHPad)) + return isa<ConstantTokenNone>(CatchSwitch->getParentPad()) && + CatchSwitch->unwindsToCaller(); + if (auto *CleanupPad = dyn_cast<CleanupPadInst>(EHPad)) + return isa<ConstantTokenNone>(CleanupPad->getParentPad()) && + getCleanupRetUnwindDest(CleanupPad) == nullptr; + if (isa<CatchPadInst>(EHPad)) + return false; + llvm_unreachable("unexpected EHPad!"); +} + +void llvm::calculateSEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Don't compute state numbers twice. + if (!FuncInfo.SEHUnwindMap.empty()) + return; + + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) + continue; + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) + continue; + ::calculateSEHStateNumbers(FuncInfo, FirstNonPHI, -1); + } + + calculateStateNumbersForInvokes(Fn, FuncInfo); +} + +void llvm::calculateWinCXXEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Return if it's already been done. + if (!FuncInfo.EHPadStateMap.empty()) + return; + + for (const BasicBlock &BB : *Fn) { + if (!BB.isEHPad()) + continue; + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + if (!isTopLevelPadForMSVC(FirstNonPHI)) + continue; + calculateCXXStateNumbers(FuncInfo, FirstNonPHI, -1); + } + + calculateStateNumbersForInvokes(Fn, FuncInfo); +} + +static int addClrEHHandler(WinEHFuncInfo &FuncInfo, int HandlerParentState, + int TryParentState, ClrHandlerType HandlerType, + uint32_t TypeToken, const BasicBlock *Handler) { + ClrEHUnwindMapEntry Entry; + Entry.HandlerParentState = HandlerParentState; + Entry.TryParentState = TryParentState; + Entry.Handler = Handler; + Entry.HandlerType = HandlerType; + Entry.TypeToken = TypeToken; + FuncInfo.ClrEHUnwindMap.push_back(Entry); + return FuncInfo.ClrEHUnwindMap.size() - 1; +} + +void llvm::calculateClrEHStateNumbers(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + // Return if it's already been done. + if (!FuncInfo.EHPadStateMap.empty()) + return; + + // This numbering assigns one state number to each catchpad and cleanuppad. + // It also computes two tree-like relations over states: + // 1) Each state has a "HandlerParentState", which is the state of the next + // outer handler enclosing this state's handler (same as nearest ancestor + // per the ParentPad linkage on EH pads, but skipping over catchswitches). + // 2) Each state has a "TryParentState", which: + // a) for a catchpad that's not the last handler on its catchswitch, is + // the state of the next catchpad on that catchswitch + // b) for all other pads, is the state of the pad whose try region is the + // next outer try region enclosing this state's try region. The "try + // regions are not present as such in the IR, but will be inferred + // based on the placement of invokes and pads which reach each other + // by exceptional exits + // Catchswitches do not get their own states, but each gets mapped to the + // state of its first catchpad. + + // Step one: walk down from outermost to innermost funclets, assigning each + // catchpad and cleanuppad a state number. Add an entry to the + // ClrEHUnwindMap for each state, recording its HandlerParentState and + // handler attributes. Record the TryParentState as well for each catchpad + // that's not the last on its catchswitch, but initialize all other entries' + // TryParentStates to a sentinel -1 value that the next pass will update. + + // Seed a worklist with pads that have no parent. + SmallVector<std::pair<const Instruction *, int>, 8> Worklist; + for (const BasicBlock &BB : *Fn) { + const Instruction *FirstNonPHI = BB.getFirstNonPHI(); + const Value *ParentPad; + if (const auto *CPI = dyn_cast<CleanupPadInst>(FirstNonPHI)) + ParentPad = CPI->getParentPad(); + else if (const auto *CSI = dyn_cast<CatchSwitchInst>(FirstNonPHI)) + ParentPad = CSI->getParentPad(); + else + continue; + if (isa<ConstantTokenNone>(ParentPad)) + Worklist.emplace_back(FirstNonPHI, -1); + } + + // Use the worklist to visit all pads, from outer to inner. Record + // HandlerParentState for all pads. Record TryParentState only for catchpads + // that aren't the last on their catchswitch (setting all other entries' + // TryParentStates to an initial value of -1). This loop is also responsible + // for setting the EHPadStateMap entry for all catchpads, cleanuppads, and + // catchswitches. + while (!Worklist.empty()) { + const Instruction *Pad; + int HandlerParentState; + std::tie(Pad, HandlerParentState) = Worklist.pop_back_val(); + + if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) { + // Create the entry for this cleanup with the appropriate handler + // properties. Finaly and fault handlers are distinguished by arity. + ClrHandlerType HandlerType = + (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault + : ClrHandlerType::Finally); + int CleanupState = addClrEHHandler(FuncInfo, HandlerParentState, -1, + HandlerType, 0, Pad->getParent()); + // Queue any child EH pads on the worklist. + for (const User *U : Cleanup->users()) + if (const auto *I = dyn_cast<Instruction>(U)) + if (I->isEHPad()) + Worklist.emplace_back(I, CleanupState); + // Remember this pad's state. + FuncInfo.EHPadStateMap[Cleanup] = CleanupState; + } else { + // Walk the handlers of this catchswitch in reverse order since all but + // the last need to set the following one as its TryParentState. + const auto *CatchSwitch = cast<CatchSwitchInst>(Pad); + int CatchState = -1, FollowerState = -1; + SmallVector<const BasicBlock *, 4> CatchBlocks(CatchSwitch->handlers()); + for (auto CBI = CatchBlocks.rbegin(), CBE = CatchBlocks.rend(); + CBI != CBE; ++CBI, FollowerState = CatchState) { + const BasicBlock *CatchBlock = *CBI; + // Create the entry for this catch with the appropriate handler + // properties. + const auto *Catch = cast<CatchPadInst>(CatchBlock->getFirstNonPHI()); + uint32_t TypeToken = static_cast<uint32_t>( + cast<ConstantInt>(Catch->getArgOperand(0))->getZExtValue()); + CatchState = + addClrEHHandler(FuncInfo, HandlerParentState, FollowerState, + ClrHandlerType::Catch, TypeToken, CatchBlock); + // Queue any child EH pads on the worklist. + for (const User *U : Catch->users()) + if (const auto *I = dyn_cast<Instruction>(U)) + if (I->isEHPad()) + Worklist.emplace_back(I, CatchState); + // Remember this catch's state. + FuncInfo.EHPadStateMap[Catch] = CatchState; + } + // Associate the catchswitch with the state of its first catch. + assert(CatchSwitch->getNumHandlers()); + FuncInfo.EHPadStateMap[CatchSwitch] = CatchState; + } + } + + // Step two: record the TryParentState of each state. For cleanuppads that + // don't have cleanuprets, we may need to infer this from their child pads, + // so visit pads in descendant-most to ancestor-most order. + for (auto Entry = FuncInfo.ClrEHUnwindMap.rbegin(), + End = FuncInfo.ClrEHUnwindMap.rend(); + Entry != End; ++Entry) { + const Instruction *Pad = + Entry->Handler.get<const BasicBlock *>()->getFirstNonPHI(); + // For most pads, the TryParentState is the state associated with the + // unwind dest of exceptional exits from it. + const BasicBlock *UnwindDest; + if (const auto *Catch = dyn_cast<CatchPadInst>(Pad)) { + // If a catch is not the last in its catchswitch, its TryParentState is + // the state associated with the next catch in the switch, even though + // that's not the unwind dest of exceptions escaping the catch. Those + // cases were already assigned a TryParentState in the first pass, so + // skip them. + if (Entry->TryParentState != -1) + continue; + // Otherwise, get the unwind dest from the catchswitch. + UnwindDest = Catch->getCatchSwitch()->getUnwindDest(); + } else { + const auto *Cleanup = cast<CleanupPadInst>(Pad); + UnwindDest = nullptr; + for (const User *U : Cleanup->users()) { + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { + // Common and unambiguous case -- cleanupret indicates cleanup's + // unwind dest. + UnwindDest = CleanupRet->getUnwindDest(); + break; + } + + // Get an unwind dest for the user + const BasicBlock *UserUnwindDest = nullptr; + if (auto *Invoke = dyn_cast<InvokeInst>(U)) { + UserUnwindDest = Invoke->getUnwindDest(); + } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(U)) { + UserUnwindDest = CatchSwitch->getUnwindDest(); + } else if (auto *ChildCleanup = dyn_cast<CleanupPadInst>(U)) { + int UserState = FuncInfo.EHPadStateMap[ChildCleanup]; + int UserUnwindState = + FuncInfo.ClrEHUnwindMap[UserState].TryParentState; + if (UserUnwindState != -1) + UserUnwindDest = FuncInfo.ClrEHUnwindMap[UserUnwindState] + .Handler.get<const BasicBlock *>(); + } + + // Not having an unwind dest for this user might indicate that it + // doesn't unwind, so can't be taken as proof that the cleanup itself + // may unwind to caller (see e.g. SimplifyUnreachable and + // RemoveUnwindEdge). + if (!UserUnwindDest) + continue; + + // Now we have an unwind dest for the user, but we need to see if it + // unwinds all the way out of the cleanup or if it stays within it. + const Instruction *UserUnwindPad = UserUnwindDest->getFirstNonPHI(); + const Value *UserUnwindParent; + if (auto *CSI = dyn_cast<CatchSwitchInst>(UserUnwindPad)) + UserUnwindParent = CSI->getParentPad(); + else + UserUnwindParent = + cast<CleanupPadInst>(UserUnwindPad)->getParentPad(); + + // The unwind stays within the cleanup iff it targets a child of the + // cleanup. + if (UserUnwindParent == Cleanup) + continue; + + // This unwind exits the cleanup, so its dest is the cleanup's dest. + UnwindDest = UserUnwindDest; + break; + } + } + + // Record the state of the unwind dest as the TryParentState. + int UnwindDestState; + + // If UnwindDest is null at this point, either the pad in question can + // be exited by unwind to caller, or it cannot be exited by unwind. In + // either case, reporting such cases as unwinding to caller is correct. + // This can lead to EH tables that "look strange" -- if this pad's is in + // a parent funclet which has other children that do unwind to an enclosing + // pad, the try region for this pad will be missing the "duplicate" EH + // clause entries that you'd expect to see covering the whole parent. That + // should be benign, since the unwind never actually happens. If it were + // an issue, we could add a subsequent pass that pushes unwind dests down + // from parents that have them to children that appear to unwind to caller. + if (!UnwindDest) { + UnwindDestState = -1; + } else { + UnwindDestState = FuncInfo.EHPadStateMap[UnwindDest->getFirstNonPHI()]; + } + + Entry->TryParentState = UnwindDestState; + } + + // Step three: transfer information from pads to invokes. + calculateStateNumbersForInvokes(Fn, FuncInfo); +} + +void WinEHPrepare::colorFunclets(Function &F) { + BlockColors = colorEHFunclets(F); + + // Invert the map from BB to colors to color to BBs. + for (BasicBlock &BB : F) { + ColorVector &Colors = BlockColors[&BB]; + for (BasicBlock *Color : Colors) + FuncletBlocks[Color].push_back(&BB); + } +} + +void llvm::calculateCatchReturnSuccessorColors(const Function *Fn, + WinEHFuncInfo &FuncInfo) { + for (const BasicBlock &BB : *Fn) { + const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator()); + if (!CatchRet) + continue; + // A 'catchret' returns to the outer scope's color. + Value *ParentPad = CatchRet->getParentPad(); + const BasicBlock *Color; + if (isa<ConstantTokenNone>(ParentPad)) + Color = &Fn->getEntryBlock(); + else + Color = cast<Instruction>(ParentPad)->getParent(); + // Record the catchret successor's funclet membership. + FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color; + } +} + +void WinEHPrepare::demotePHIsOnFunclets(Function &F) { + // Strip PHI nodes off of EH pads. + SmallVector<PHINode *, 16> PHINodes; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + BasicBlock *BB = &*FI++; + if (!BB->isEHPad()) + continue; + for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { + Instruction *I = &*BI++; + auto *PN = dyn_cast<PHINode>(I); + // Stop at the first non-PHI. + if (!PN) + break; + + AllocaInst *SpillSlot = insertPHILoads(PN, F); + if (SpillSlot) + insertPHIStores(PN, SpillSlot); + + PHINodes.push_back(PN); + } + } + + for (auto *PN : PHINodes) { + // There may be lingering uses on other EH PHIs being removed + PN->replaceAllUsesWith(UndefValue::get(PN->getType())); + PN->eraseFromParent(); + } +} + +void WinEHPrepare::cloneCommonBlocks(Function &F) { + // We need to clone all blocks which belong to multiple funclets. Values are + // remapped throughout the funclet to propogate both the new instructions + // *and* the new basic blocks themselves. + for (auto &Funclets : FuncletBlocks) { + BasicBlock *FuncletPadBB = Funclets.first; + std::vector<BasicBlock *> &BlocksInFunclet = Funclets.second; + Value *FuncletToken; + if (FuncletPadBB == &F.getEntryBlock()) + FuncletToken = ConstantTokenNone::get(F.getContext()); + else + FuncletToken = FuncletPadBB->getFirstNonPHI(); + + std::vector<std::pair<BasicBlock *, BasicBlock *>> Orig2Clone; + ValueToValueMapTy VMap; + for (BasicBlock *BB : BlocksInFunclet) { + ColorVector &ColorsForBB = BlockColors[BB]; + // We don't need to do anything if the block is monochromatic. + size_t NumColorsForBB = ColorsForBB.size(); + if (NumColorsForBB == 1) + continue; + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Cloning block \'" << BB->getName() + << "\' for funclet \'" << FuncletPadBB->getName() + << "\'.\n"); + + // Create a new basic block and copy instructions into it! + BasicBlock *CBB = + CloneBasicBlock(BB, VMap, Twine(".for.", FuncletPadBB->getName())); + // Insert the clone immediately after the original to ensure determinism + // and to keep the same relative ordering of any funclet's blocks. + CBB->insertInto(&F, BB->getNextNode()); + + // Add basic block mapping. + VMap[BB] = CBB; + + // Record delta operations that we need to perform to our color mappings. + Orig2Clone.emplace_back(BB, CBB); + } + + // If nothing was cloned, we're done cloning in this funclet. + if (Orig2Clone.empty()) + continue; + + // Update our color mappings to reflect that one block has lost a color and + // another has gained a color. + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + + BlocksInFunclet.push_back(NewBlock); + ColorVector &NewColors = BlockColors[NewBlock]; + assert(NewColors.empty() && "A new block should only have one color!"); + NewColors.push_back(FuncletPadBB); + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Assigned color \'" << FuncletPadBB->getName() + << "\' to block \'" << NewBlock->getName() + << "\'.\n"); + + BlocksInFunclet.erase( + std::remove(BlocksInFunclet.begin(), BlocksInFunclet.end(), OldBlock), + BlocksInFunclet.end()); + ColorVector &OldColors = BlockColors[OldBlock]; + OldColors.erase( + std::remove(OldColors.begin(), OldColors.end(), FuncletPadBB), + OldColors.end()); + + DEBUG_WITH_TYPE("winehprepare-coloring", + dbgs() << " Removed color \'" << FuncletPadBB->getName() + << "\' from block \'" << OldBlock->getName() + << "\'.\n"); + } + + // Loop over all of the instructions in this funclet, fixing up operand + // references as we go. This uses VMap to do all the hard work. + for (BasicBlock *BB : BlocksInFunclet) + // Loop over all instructions, fixing each one as we find it... + for (Instruction &I : *BB) + RemapInstruction(&I, VMap, + RF_IgnoreMissingEntries | RF_NoModuleLevelChanges); + + // Catchrets targeting cloned blocks need to be updated separately from + // the loop above because they are not in the current funclet. + SmallVector<CatchReturnInst *, 2> FixupCatchrets; + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + + FixupCatchrets.clear(); + for (BasicBlock *Pred : predecessors(OldBlock)) + if (auto *CatchRet = dyn_cast<CatchReturnInst>(Pred->getTerminator())) + if (CatchRet->getParentPad() == FuncletToken) + FixupCatchrets.push_back(CatchRet); + + for (CatchReturnInst *CatchRet : FixupCatchrets) + CatchRet->setSuccessor(NewBlock); + } + + auto UpdatePHIOnClonedBlock = [&](PHINode *PN, bool IsForOldBlock) { + unsigned NumPreds = PN->getNumIncomingValues(); + for (unsigned PredIdx = 0, PredEnd = NumPreds; PredIdx != PredEnd; + ++PredIdx) { + BasicBlock *IncomingBlock = PN->getIncomingBlock(PredIdx); + bool EdgeTargetsFunclet; + if (auto *CRI = + dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) { + EdgeTargetsFunclet = (CRI->getParentPad() == FuncletToken); + } else { + ColorVector &IncomingColors = BlockColors[IncomingBlock]; + assert(!IncomingColors.empty() && "Block not colored!"); + assert((IncomingColors.size() == 1 || + llvm::all_of(IncomingColors, + [&](BasicBlock *Color) { + return Color != FuncletPadBB; + })) && + "Cloning should leave this funclet's blocks monochromatic"); + EdgeTargetsFunclet = (IncomingColors.front() == FuncletPadBB); + } + if (IsForOldBlock != EdgeTargetsFunclet) + continue; + PN->removeIncomingValue(IncomingBlock, /*DeletePHIIfEmpty=*/false); + // Revisit the next entry. + --PredIdx; + --PredEnd; + } + }; + + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + for (Instruction &OldI : *OldBlock) { + auto *OldPN = dyn_cast<PHINode>(&OldI); + if (!OldPN) + break; + UpdatePHIOnClonedBlock(OldPN, /*IsForOldBlock=*/true); + } + for (Instruction &NewI : *NewBlock) { + auto *NewPN = dyn_cast<PHINode>(&NewI); + if (!NewPN) + break; + UpdatePHIOnClonedBlock(NewPN, /*IsForOldBlock=*/false); + } + } + + // Check to see if SuccBB has PHI nodes. If so, we need to add entries to + // the PHI nodes for NewBB now. + for (auto &BBMapping : Orig2Clone) { + BasicBlock *OldBlock = BBMapping.first; + BasicBlock *NewBlock = BBMapping.second; + for (BasicBlock *SuccBB : successors(NewBlock)) { + for (Instruction &SuccI : *SuccBB) { + auto *SuccPN = dyn_cast<PHINode>(&SuccI); + if (!SuccPN) + break; + + // Ok, we have a PHI node. Figure out what the incoming value was for + // the OldBlock. + int OldBlockIdx = SuccPN->getBasicBlockIndex(OldBlock); + if (OldBlockIdx == -1) + break; + Value *IV = SuccPN->getIncomingValue(OldBlockIdx); + + // Remap the value if necessary. + if (auto *Inst = dyn_cast<Instruction>(IV)) { + ValueToValueMapTy::iterator I = VMap.find(Inst); + if (I != VMap.end()) + IV = I->second; + } + + SuccPN->addIncoming(IV, NewBlock); + } + } + } + + for (ValueToValueMapTy::value_type VT : VMap) { + // If there were values defined in BB that are used outside the funclet, + // then we now have to update all uses of the value to use either the + // original value, the cloned value, or some PHI derived value. This can + // require arbitrary PHI insertion, of which we are prepared to do, clean + // these up now. + SmallVector<Use *, 16> UsesToRename; + + auto *OldI = dyn_cast<Instruction>(const_cast<Value *>(VT.first)); + if (!OldI) + continue; + auto *NewI = cast<Instruction>(VT.second); + // Scan all uses of this instruction to see if it is used outside of its + // funclet, and if so, record them in UsesToRename. + for (Use &U : OldI->uses()) { + Instruction *UserI = cast<Instruction>(U.getUser()); + BasicBlock *UserBB = UserI->getParent(); + ColorVector &ColorsForUserBB = BlockColors[UserBB]; + assert(!ColorsForUserBB.empty()); + if (ColorsForUserBB.size() > 1 || + *ColorsForUserBB.begin() != FuncletPadBB) + UsesToRename.push_back(&U); + } + + // If there are no uses outside the block, we're done with this + // instruction. + if (UsesToRename.empty()) + continue; + + // We found a use of OldI outside of the funclet. Rename all uses of OldI + // that are outside its funclet to be uses of the appropriate PHI node + // etc. + SSAUpdater SSAUpdate; + SSAUpdate.Initialize(OldI->getType(), OldI->getName()); + SSAUpdate.AddAvailableValue(OldI->getParent(), OldI); + SSAUpdate.AddAvailableValue(NewI->getParent(), NewI); + + while (!UsesToRename.empty()) + SSAUpdate.RewriteUseAfterInsertions(*UsesToRename.pop_back_val()); + } + } +} + +void WinEHPrepare::removeImplausibleInstructions(Function &F) { + // Remove implausible terminators and replace them with UnreachableInst. + for (auto &Funclet : FuncletBlocks) { + BasicBlock *FuncletPadBB = Funclet.first; + std::vector<BasicBlock *> &BlocksInFunclet = Funclet.second; + Instruction *FirstNonPHI = FuncletPadBB->getFirstNonPHI(); + auto *FuncletPad = dyn_cast<FuncletPadInst>(FirstNonPHI); + auto *CatchPad = dyn_cast_or_null<CatchPadInst>(FuncletPad); + auto *CleanupPad = dyn_cast_or_null<CleanupPadInst>(FuncletPad); + + for (BasicBlock *BB : BlocksInFunclet) { + for (Instruction &I : *BB) { + CallSite CS(&I); + if (!CS) + continue; + + Value *FuncletBundleOperand = nullptr; + if (auto BU = CS.getOperandBundle(LLVMContext::OB_funclet)) + FuncletBundleOperand = BU->Inputs.front(); + + if (FuncletBundleOperand == FuncletPad) + continue; + + // Skip call sites which are nounwind intrinsics. + auto *CalledFn = + dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && CS.doesNotThrow()) + continue; + + // This call site was not part of this funclet, remove it. + if (CS.isInvoke()) { + // Remove the unwind edge if it was an invoke. + removeUnwindEdge(BB); + // Get a pointer to the new call. + BasicBlock::iterator CallI = + std::prev(BB->getTerminator()->getIterator()); + auto *CI = cast<CallInst>(&*CallI); + changeToUnreachable(CI, /*UseLLVMTrap=*/false); + } else { + changeToUnreachable(&I, /*UseLLVMTrap=*/false); + } + + // There are no more instructions in the block (except for unreachable), + // we are done. + break; + } + + TerminatorInst *TI = BB->getTerminator(); + // CatchPadInst and CleanupPadInst can't transfer control to a ReturnInst. + bool IsUnreachableRet = isa<ReturnInst>(TI) && FuncletPad; + // The token consumed by a CatchReturnInst must match the funclet token. + bool IsUnreachableCatchret = false; + if (auto *CRI = dyn_cast<CatchReturnInst>(TI)) + IsUnreachableCatchret = CRI->getCatchPad() != CatchPad; + // The token consumed by a CleanupReturnInst must match the funclet token. + bool IsUnreachableCleanupret = false; + if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) + IsUnreachableCleanupret = CRI->getCleanupPad() != CleanupPad; + if (IsUnreachableRet || IsUnreachableCatchret || + IsUnreachableCleanupret) { + changeToUnreachable(TI, /*UseLLVMTrap=*/false); + } else if (isa<InvokeInst>(TI)) { + if (Personality == EHPersonality::MSVC_CXX && CleanupPad) { + // Invokes within a cleanuppad for the MSVC++ personality never + // transfer control to their unwind edge: the personality will + // terminate the program. + removeUnwindEdge(BB); + } + } + } + } +} + +void WinEHPrepare::cleanupPreparedFunclets(Function &F) { + // Clean-up some of the mess we made by removing useles PHI nodes, trivial + // branches, etc. + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE;) { + BasicBlock *BB = &*FI++; + SimplifyInstructionsInBlock(BB); + ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true); + MergeBlockIntoPredecessor(BB); + } + + // We might have some unreachable blocks after cleaning up some impossible + // control flow. + removeUnreachableBlocks(F); +} + +void WinEHPrepare::verifyPreparedFunclets(Function &F) { + for (BasicBlock &BB : F) { + size_t NumColors = BlockColors[&BB].size(); + assert(NumColors == 1 && "Expected monochromatic BB!"); + if (NumColors == 0) + report_fatal_error("Uncolored BB!"); + if (NumColors > 1) + report_fatal_error("Multicolor BB!"); + assert((DisableDemotion || !(BB.isEHPad() && isa<PHINode>(BB.begin()))) && + "EH Pad still has a PHI!"); + } +} + +bool WinEHPrepare::prepareExplicitEH(Function &F) { + // Remove unreachable blocks. It is not valuable to assign them a color and + // their existence can trick us into thinking values are alive when they are + // not. + removeUnreachableBlocks(F); + + // Determine which blocks are reachable from which funclet entries. + colorFunclets(F); + + cloneCommonBlocks(F); + + if (!DisableDemotion) + demotePHIsOnFunclets(F); + + if (!DisableCleanups) { + DEBUG(verifyFunction(F)); + removeImplausibleInstructions(F); + + DEBUG(verifyFunction(F)); + cleanupPreparedFunclets(F); + } + + DEBUG(verifyPreparedFunclets(F)); + // Recolor the CFG to verify that all is well. + DEBUG(colorFunclets(F)); + DEBUG(verifyPreparedFunclets(F)); + + BlockColors.clear(); + FuncletBlocks.clear(); + + return true; +} + +// TODO: Share loads when one use dominates another, or when a catchpad exit +// dominates uses (needs dominators). +AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) { + BasicBlock *PHIBlock = PN->getParent(); + AllocaInst *SpillSlot = nullptr; + Instruction *EHPad = PHIBlock->getFirstNonPHI(); + + if (!isa<TerminatorInst>(EHPad)) { + // If the EHPad isn't a terminator, then we can insert a load in this block + // that will dominate all uses. + SpillSlot = new AllocaInst(PN->getType(), nullptr, + Twine(PN->getName(), ".wineh.spillslot"), + &F.getEntryBlock().front()); + Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"), + &*PHIBlock->getFirstInsertionPt()); + PN->replaceAllUsesWith(V); + return SpillSlot; + } + + // Otherwise, we have a PHI on a terminator EHPad, and we give up and insert + // loads of the slot before every use. + DenseMap<BasicBlock *, Value *> Loads; + for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); + UI != UE;) { + Use &U = *UI++; + auto *UsingInst = cast<Instruction>(U.getUser()); + if (isa<PHINode>(UsingInst) && UsingInst->getParent()->isEHPad()) { + // Use is on an EH pad phi. Leave it alone; we'll insert loads and + // stores for it separately. + continue; + } + replaceUseWithLoad(PN, U, SpillSlot, Loads, F); + } + return SpillSlot; +} + +// TODO: improve store placement. Inserting at def is probably good, but need +// to be careful not to introduce interfering stores (needs liveness analysis). +// TODO: identify related phi nodes that can share spill slots, and share them +// (also needs liveness). +void WinEHPrepare::insertPHIStores(PHINode *OriginalPHI, + AllocaInst *SpillSlot) { + // Use a worklist of (Block, Value) pairs -- the given Value needs to be + // stored to the spill slot by the end of the given Block. + SmallVector<std::pair<BasicBlock *, Value *>, 4> Worklist; + + Worklist.push_back({OriginalPHI->getParent(), OriginalPHI}); + + while (!Worklist.empty()) { + BasicBlock *EHBlock; + Value *InVal; + std::tie(EHBlock, InVal) = Worklist.pop_back_val(); + + PHINode *PN = dyn_cast<PHINode>(InVal); + if (PN && PN->getParent() == EHBlock) { + // The value is defined by another PHI we need to remove, with no room to + // insert a store after the PHI, so each predecessor needs to store its + // incoming value. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) { + Value *PredVal = PN->getIncomingValue(i); + + // Undef can safely be skipped. + if (isa<UndefValue>(PredVal)) + continue; + + insertPHIStore(PN->getIncomingBlock(i), PredVal, SpillSlot, Worklist); + } + } else { + // We need to store InVal, which dominates EHBlock, but can't put a store + // in EHBlock, so need to put stores in each predecessor. + for (BasicBlock *PredBlock : predecessors(EHBlock)) { + insertPHIStore(PredBlock, InVal, SpillSlot, Worklist); + } + } + } +} + +void WinEHPrepare::insertPHIStore( + BasicBlock *PredBlock, Value *PredVal, AllocaInst *SpillSlot, + SmallVectorImpl<std::pair<BasicBlock *, Value *>> &Worklist) { + + if (PredBlock->isEHPad() && + isa<TerminatorInst>(PredBlock->getFirstNonPHI())) { + // Pred is unsplittable, so we need to queue it on the worklist. + Worklist.push_back({PredBlock, PredVal}); + return; + } + + // Otherwise, insert the store at the end of the basic block. + new StoreInst(PredVal, SpillSlot, PredBlock->getTerminator()); +} + +void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot, + DenseMap<BasicBlock *, Value *> &Loads, + Function &F) { + // Lazilly create the spill slot. + if (!SpillSlot) + SpillSlot = new AllocaInst(V->getType(), nullptr, + Twine(V->getName(), ".wineh.spillslot"), + &F.getEntryBlock().front()); + + auto *UsingInst = cast<Instruction>(U.getUser()); + if (auto *UsingPHI = dyn_cast<PHINode>(UsingInst)) { + // If this is a PHI node, we can't insert a load of the value before + // the use. Instead insert the load in the predecessor block + // corresponding to the incoming value. + // + // Note that if there are multiple edges from a basic block to this + // PHI node that we cannot have multiple loads. The problem is that + // the resulting PHI node will have multiple values (from each load) + // coming in from the same block, which is illegal SSA form. + // For this reason, we keep track of and reuse loads we insert. + BasicBlock *IncomingBlock = UsingPHI->getIncomingBlock(U); + if (auto *CatchRet = + dyn_cast<CatchReturnInst>(IncomingBlock->getTerminator())) { + // Putting a load above a catchret and use on the phi would still leave + // a cross-funclet def/use. We need to split the edge, change the + // catchret to target the new block, and put the load there. + BasicBlock *PHIBlock = UsingInst->getParent(); + BasicBlock *NewBlock = SplitEdge(IncomingBlock, PHIBlock); + // SplitEdge gives us: + // IncomingBlock: + // ... + // br label %NewBlock + // NewBlock: + // catchret label %PHIBlock + // But we need: + // IncomingBlock: + // ... + // catchret label %NewBlock + // NewBlock: + // br label %PHIBlock + // So move the terminators to each others' blocks and swap their + // successors. + BranchInst *Goto = cast<BranchInst>(IncomingBlock->getTerminator()); + Goto->removeFromParent(); + CatchRet->removeFromParent(); + IncomingBlock->getInstList().push_back(CatchRet); + NewBlock->getInstList().push_back(Goto); + Goto->setSuccessor(0, PHIBlock); + CatchRet->setSuccessor(NewBlock); + // Update the color mapping for the newly split edge. + ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock]; + BlockColors[NewBlock] = ColorsForPHIBlock; + for (BasicBlock *FuncletPad : ColorsForPHIBlock) + FuncletBlocks[FuncletPad].push_back(NewBlock); + // Treat the new block as incoming for load insertion. + IncomingBlock = NewBlock; + } + Value *&Load = Loads[IncomingBlock]; + // Insert the load into the predecessor block + if (!Load) + Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*Volatile=*/false, IncomingBlock->getTerminator()); + + U.set(Load); + } else { + // Reload right before the old use. + auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"), + /*Volatile=*/false, UsingInst); + U.set(Load); + } +} + +void WinEHFuncInfo::addIPToStateRange(const InvokeInst *II, + MCSymbol *InvokeBegin, + MCSymbol *InvokeEnd) { + assert(InvokeStateMap.count(II) && + "should get invoke with precomputed state"); + LabelToStateMap[InvokeBegin] = std::make_pair(InvokeStateMap[II], InvokeEnd); +} + +WinEHFuncInfo::WinEHFuncInfo() {} diff --git a/contrib/llvm/lib/CodeGen/module.modulemap b/contrib/llvm/lib/CodeGen/module.modulemap new file mode 100644 index 0000000..d4f68bc --- /dev/null +++ b/contrib/llvm/lib/CodeGen/module.modulemap @@ -0,0 +1 @@ +module CodeGen { requires cplusplus umbrella "." module * { export * } } |